diff options
Diffstat (limited to 'libgo/go/text/template/parse/lex.go')
-rw-r--r-- | libgo/go/text/template/parse/lex.go | 240 |
1 files changed, 139 insertions, 101 deletions
diff --git a/libgo/go/text/template/parse/lex.go b/libgo/go/text/template/parse/lex.go index c4e1a56a8d4..23c0cf0793c 100644 --- a/libgo/go/text/template/parse/lex.go +++ b/libgo/go/text/template/parse/lex.go @@ -13,8 +13,9 @@ import ( // item represents a token or text string returned from the scanner. type item struct { - typ itemType - val string + typ itemType // The type of this item. + pos Pos // The starting position, in bytes, of this item in the input string. + val string // The value of this item. } func (i item) String() string { @@ -42,65 +43,32 @@ const ( itemComplex // complex constant (1+2i); imaginary is just a number itemColonEquals // colon-equals (':=') introducing a declaration itemEOF - itemField // alphanumeric identifier, starting with '.', possibly chained ('.x.y') - itemIdentifier // alphanumeric identifier + itemField // alphanumeric identifier starting with '.' + itemIdentifier // alphanumeric identifier not starting with '.' itemLeftDelim // left action delimiter + itemLeftParen // '(' inside action itemNumber // simple number, including imaginary itemPipe // pipe symbol itemRawString // raw quoted string (includes quotes) itemRightDelim // right action delimiter + itemRightParen // ')' inside action + itemSpace // run of spaces separating arguments itemString // quoted string (includes quotes) itemText // plain text - itemVariable // variable starting with '$', such as '$' or '$1' or '$hello'. + itemVariable // variable starting with '$', such as '$' or '$1' or '$hello' // Keywords appear after all the rest. itemKeyword // used only to delimit the keywords - itemDot // the cursor, spelled '.'. + itemDot // the cursor, spelled '.' itemDefine // define keyword itemElse // else keyword itemEnd // end keyword itemIf // if keyword + itemNil // the untyped nil constant, easiest to treat as a keyword itemRange // range keyword itemTemplate // template keyword itemWith // with keyword ) -// Make the types prettyprint. -var itemName = map[itemType]string{ - itemError: "error", - itemBool: "bool", - itemChar: "char", - itemCharConstant: "charconst", - itemComplex: "complex", - itemColonEquals: ":=", - itemEOF: "EOF", - itemField: "field", - itemIdentifier: "identifier", - itemLeftDelim: "left delim", - itemNumber: "number", - itemPipe: "pipe", - itemRawString: "raw string", - itemRightDelim: "right delim", - itemString: "string", - itemVariable: "variable", - // keywords - itemDot: ".", - itemDefine: "define", - itemElse: "else", - itemIf: "if", - itemEnd: "end", - itemRange: "range", - itemTemplate: "template", - itemWith: "with", -} - -func (i itemType) String() string { - s := itemName[i] - if s == "" { - return fmt.Sprintf("item%d", int(i)) - } - return s -} - var key = map[string]itemType{ ".": itemDot, "define": itemDefine, @@ -108,6 +76,7 @@ var key = map[string]itemType{ "end": itemEnd, "if": itemIf, "range": itemRange, + "nil": itemNil, "template": itemTemplate, "with": itemWith, } @@ -119,24 +88,27 @@ type stateFn func(*lexer) stateFn // lexer holds the state of the scanner. type lexer struct { - name string // the name of the input; used only for error reports. - input string // the string being scanned. - leftDelim string // start of action. - rightDelim string // end of action. - state stateFn // the next lexing function to enter. - pos int // current position in the input. - start int // start position of this item. - width int // width of last rune read from input. - items chan item // channel of scanned items. + name string // the name of the input; used only for error reports + input string // the string being scanned + leftDelim string // start of action + rightDelim string // end of action + state stateFn // the next lexing function to enter + pos Pos // current position in the input + start Pos // start position of this item + width Pos // width of last rune read from input + lastPos Pos // position of most recent item returned by nextItem + items chan item // channel of scanned items + parenDepth int // nesting depth of ( ) exprs } // next returns the next rune in the input. -func (l *lexer) next() (r rune) { - if l.pos >= len(l.input) { +func (l *lexer) next() rune { + if int(l.pos) >= len(l.input) { l.width = 0 return eof } - r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + r, w := utf8.DecodeRuneInString(l.input[l.pos:]) + l.width = Pos(w) l.pos += l.width return r } @@ -155,7 +127,7 @@ func (l *lexer) backup() { // emit passes an item back to the client. func (l *lexer) emit(t itemType) { - l.items <- item{t, l.input[l.start:l.pos]} + l.items <- item{t, l.start, l.input[l.start:l.pos]} l.start = l.pos } @@ -180,30 +152,25 @@ func (l *lexer) acceptRun(valid string) { l.backup() } -// lineNumber reports which line we're on. Doing it this way +// lineNumber reports which line we're on, based on the position of +// the previous item returned by nextItem. Doing it this way // means we don't have to worry about peek double counting. func (l *lexer) lineNumber() int { - return 1 + strings.Count(l.input[:l.pos], "\n") + return 1 + strings.Count(l.input[:l.lastPos], "\n") } -// error returns an error token and terminates the scan by passing +// errorf returns an error token and terminates the scan by passing // back a nil pointer that will be the next state, terminating l.nextItem. func (l *lexer) errorf(format string, args ...interface{}) stateFn { - l.items <- item{itemError, fmt.Sprintf(format, args...)} + l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} return nil } // nextItem returns the next item from the input. func (l *lexer) nextItem() item { - for { - select { - case item := <-l.items: - return item - default: - l.state = l.state(l) - } - } - panic("not reached") + item := <-l.items + l.lastPos = item.pos + return item } // lex creates a new scanner for the input string. @@ -219,12 +186,19 @@ func lex(name, input, left, right string) *lexer { input: input, leftDelim: left, rightDelim: right, - state: lexText, - items: make(chan item, 2), // Two items of buffering is sufficient for all state functions + items: make(chan item), } + go l.run() return l } +// run runs the state machine for the lexer. +func (l *lexer) run() { + for l.state = lexText; l.state != nil; { + l.state = l.state(l) + } +} + // state functions const ( @@ -257,29 +231,30 @@ func lexText(l *lexer) stateFn { // lexLeftDelim scans the left delimiter, which is known to be present. func lexLeftDelim(l *lexer) stateFn { - l.pos += len(l.leftDelim) + l.pos += Pos(len(l.leftDelim)) if strings.HasPrefix(l.input[l.pos:], leftComment) { return lexComment } l.emit(itemLeftDelim) + l.parenDepth = 0 return lexInsideAction } // lexComment scans a comment. The left comment marker is known to be present. func lexComment(l *lexer) stateFn { - l.pos += len(leftComment) + l.pos += Pos(len(leftComment)) i := strings.Index(l.input[l.pos:], rightComment+l.rightDelim) if i < 0 { return l.errorf("unclosed comment") } - l.pos += i + len(rightComment) + len(l.rightDelim) + l.pos += Pos(i + len(rightComment) + len(l.rightDelim)) l.ignore() return lexText } // lexRightDelim scans the right delimiter, which is known to be present. func lexRightDelim(l *lexer) stateFn { - l.pos += len(l.rightDelim) + l.pos += Pos(len(l.rightDelim)) l.emit(itemRightDelim) return lexText } @@ -287,16 +262,19 @@ func lexRightDelim(l *lexer) stateFn { // lexInsideAction scans the elements inside action delimiters. func lexInsideAction(l *lexer) stateFn { // Either number, quoted string, or identifier. - // Spaces separate and are ignored. + // Spaces separate arguments; runs of spaces turn into itemSpace. // Pipe symbols separate and are emitted. if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { - return lexRightDelim + if l.parenDepth == 0 { + return lexRightDelim + } + return l.errorf("unclosed left paren") } switch r := l.next(); { - case r == eof || r == '\n': + case r == eof || isEndOfLine(r): return l.errorf("unclosed action") case isSpace(r): - l.ignore() + return lexSpace case r == ':': if l.next() != '=' { return l.errorf("expected :=") @@ -309,15 +287,15 @@ func lexInsideAction(l *lexer) stateFn { case r == '`': return lexRawQuote case r == '$': - return lexIdentifier + return lexVariable case r == '\'': return lexChar case r == '.': // special look-ahead for ".field" so we don't break l.backup(). - if l.pos < len(l.input) { + if l.pos < Pos(len(l.input)) { r := l.input[l.pos] if r < '0' || '9' < r { - return lexIdentifier // itemDot comes from the keyword table. + return lexField } } fallthrough // '.' can start a number. @@ -327,6 +305,17 @@ func lexInsideAction(l *lexer) stateFn { case isAlphaNumeric(r): l.backup() return lexIdentifier + case r == '(': + l.emit(itemLeftParen) + l.parenDepth++ + return lexInsideAction + case r == ')': + l.emit(itemRightParen) + l.parenDepth-- + if l.parenDepth < 0 { + return l.errorf("unexpected right paren %#U", r) + } + return lexInsideAction case r <= unicode.MaxASCII && unicode.IsPrint(r): l.emit(itemChar) return lexInsideAction @@ -336,28 +325,34 @@ func lexInsideAction(l *lexer) stateFn { return lexInsideAction } -// lexIdentifier scans an alphanumeric or field. +// lexSpace scans a run of space characters. +// One space has already been seen. +func lexSpace(l *lexer) stateFn { + for isSpace(l.peek()) { + l.next() + } + l.emit(itemSpace) + return lexInsideAction +} + +// lexIdentifier scans an alphanumeric. func lexIdentifier(l *lexer) stateFn { Loop: for { switch r := l.next(); { case isAlphaNumeric(r): // absorb. - case r == '.' && (l.input[l.start] == '.' || l.input[l.start] == '$'): - // field chaining; absorb into one token. default: l.backup() word := l.input[l.start:l.pos] if !l.atTerminator() { - return l.errorf("unexpected character %+U", r) + return l.errorf("bad character %#U", r) } switch { case key[word] > itemKeyword: l.emit(key[word]) case word[0] == '.': l.emit(itemField) - case word[0] == '$': - l.emit(itemVariable) case word == "true", word == "false": l.emit(itemBool) default: @@ -369,17 +364,59 @@ Loop: return lexInsideAction } +// lexField scans a field: .Alphanumeric. +// The . has been scanned. +func lexField(l *lexer) stateFn { + return lexFieldOrVariable(l, itemField) +} + +// lexVariable scans a Variable: $Alphanumeric. +// The $ has been scanned. +func lexVariable(l *lexer) stateFn { + if l.atTerminator() { // Nothing interesting follows -> "$". + l.emit(itemVariable) + return lexInsideAction + } + return lexFieldOrVariable(l, itemVariable) +} + +// lexVariable scans a field or variable: [.$]Alphanumeric. +// The . or $ has been scanned. +func lexFieldOrVariable(l *lexer, typ itemType) stateFn { + if l.atTerminator() { // Nothing interesting follows -> "." or "$". + if typ == itemVariable { + l.emit(itemVariable) + } else { + l.emit(itemDot) + } + return lexInsideAction + } + var r rune + for { + r = l.next() + if !isAlphaNumeric(r) { + l.backup() + break + } + } + if !l.atTerminator() { + return l.errorf("bad character %#U", r) + } + l.emit(typ) + return lexInsideAction +} + // atTerminator reports whether the input is at valid termination character to -// appear after an identifier. Mostly to catch cases like "$x+2" not being -// acceptable without a space, in case we decide one day to implement -// arithmetic. +// appear after an identifier. Breaks .X.Y into two pieces. Also catches cases +// like "$x+2" not being acceptable without a space, in case we decide one +// day to implement arithmetic. func (l *lexer) atTerminator() bool { r := l.peek() - if isSpace(r) { + if isSpace(r) || isEndOfLine(r) { return true } switch r { - case eof, ',', '|', ':': + case eof, '.', ',', '|', ':', ')', '(': return true } // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will @@ -392,7 +429,7 @@ func (l *lexer) atTerminator() bool { } // lexChar scans a character constant. The initial quote is already -// scanned. Syntax checking is done by the parse. +// scanned. Syntax checking is done by the parser. func lexChar(l *lexer) stateFn { Loop: for { @@ -412,7 +449,7 @@ Loop: return lexInsideAction } -// lexNumber scans a number: decimal, octal, hex, float, or imaginary. This +// lexNumber scans a number: decimal, octal, hex, float, or imaginary. This // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" // and "089" - but when it's wrong the input is invalid and the parser (via // strconv) will notice. @@ -421,7 +458,7 @@ func lexNumber(l *lexer) stateFn { return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) } if sign := l.peek(); sign == '+' || sign == '-' { - // Complex: 1+2i. No spaces, must end in 'i'. + // Complex: 1+2i. No spaces, must end in 'i'. if !l.scanNumber() || l.input[l.pos-1] != 'i' { return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) } @@ -495,11 +532,12 @@ Loop: // isSpace reports whether r is a space character. func isSpace(r rune) bool { - switch r { - case ' ', '\t', '\n', '\r': - return true - } - return false + return r == ' ' || r == '\t' +} + +// isEndOfLine reports whether r is an end-of-line character. +func isEndOfLine(r rune) bool { + return r == '\r' || r == '\n' } // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. |