summaryrefslogtreecommitdiff
path: root/libgo/go/go/scanner/scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/go/scanner/scanner.go')
-rw-r--r--libgo/go/go/scanner/scanner.go165
1 files changed, 80 insertions, 85 deletions
diff --git a/libgo/go/go/scanner/scanner.go b/libgo/go/go/scanner/scanner.go
index 64ff127750d..8c3205230e8 100644
--- a/libgo/go/go/scanner/scanner.go
+++ b/libgo/go/go/scanner/scanner.go
@@ -4,13 +4,25 @@
// A scanner for Go source text. Takes a []byte as source which can
// then be tokenized through repeated calls to the Scan function.
-// For a sample use of a scanner, see the implementation of Tokenize.
+// Typical use:
+//
+// var s Scanner
+// fset := token.NewFileSet() // position information is relative to fset
+// s.Init(fset, filename, src, nil /* no error handler */, 0)
+// for {
+// pos, tok, lit := s.Scan()
+// if tok == token.EOF {
+// break
+// }
+// // do something here with pos, tok, and lit
+// }
//
package scanner
import (
"bytes"
"go/token"
+ "path"
"strconv"
"unicode"
"utf8"
@@ -19,23 +31,21 @@ import (
// A Scanner holds the scanner's internal state while processing
// a given text. It can be allocated as part of another data
-// structure but must be initialized via Init before use. For
-// a sample use, see the implementation of Tokenize.
+// structure but must be initialized via Init before use.
//
type Scanner struct {
// immutable state
+ file *token.File // source file handle
+ dir string // directory portion of file.Name()
src []byte // source
err ErrorHandler // error reporting; or nil
mode uint // scanning mode
// scanning state
- filename string // current filename; may change via //line filename:line comment
- line int // current line
- column int // current column
-
ch int // current character
offset int // character offset
rdOffset int // reading offset (position after current character)
+ lineOffset int // current line offset
insertSemi bool // insert a semicolon before next newline
// public state - ok to modify
@@ -47,22 +57,21 @@ type Scanner struct {
// S.ch < 0 means end-of-file.
//
func (S *Scanner) next() {
- S.column++
if S.rdOffset < len(S.src) {
S.offset = S.rdOffset
if S.ch == '\n' {
- S.line++
- S.column = 1
+ S.lineOffset = S.offset
+ S.file.AddLine(S.offset)
}
r, w := int(S.src[S.rdOffset]), 1
switch {
case r == 0:
- S.error("illegal character NUL")
+ S.error(S.offset, "illegal character NUL")
case r >= 0x80:
// not ASCII
r, w = utf8.DecodeRune(S.src[S.rdOffset:])
if r == utf8.RuneError && w == 1 {
- S.error("illegal UTF-8 encoding")
+ S.error(S.offset, "illegal UTF-8 encoding")
}
}
S.rdOffset += w
@@ -70,7 +79,8 @@ func (S *Scanner) next() {
} else {
S.offset = len(S.src)
if S.ch == '\n' {
- S.column = 1
+ S.lineOffset = S.offset
+ S.file.AddLine(S.offset)
}
S.ch = -1 // eof
}
@@ -86,28 +96,36 @@ const (
InsertSemis // automatically insert semicolons
)
-
-// Init prepares the scanner S to tokenize the text src. Calls to Scan
-// will use the error handler err if they encounter a syntax error and
-// err is not nil. Also, for each error encountered, the Scanner field
-// ErrorCount is incremented by one. The filename parameter is used as
-// filename in the token.Position returned by Scan for each token. The
-// mode parameter determines how comments and illegal characters are
-// handled.
+// Init prepares the scanner S to tokenize the text src by setting the
+// scanner at the beginning of src. The scanner uses the file set file
+// for position information and it adds line information for each line.
+// It is ok to re-use the same file when re-scanning the same file as
+// line information which is already present is ignored. Init causes a
+// panic if the file size does not match the src size.
+//
+// Calls to Scan will use the error handler err if they encounter a
+// syntax error and err is not nil. Also, for each error encountered,
+// the Scanner field ErrorCount is incremented by one. The mode parameter
+// determines how comments, illegal characters, and semicolons are handled.
+//
+// Note that Init may call err if there is an error in the first character
+// of the file.
//
-func (S *Scanner) Init(filename string, src []byte, err ErrorHandler, mode uint) {
+func (S *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode uint) {
// Explicitly initialize all fields since a scanner may be reused.
+ if file.Size() != len(src) {
+ panic("file size does not match src len")
+ }
+ S.file = file
+ S.dir, _ = path.Split(file.Name())
S.src = src
S.err = err
S.mode = mode
- S.filename = filename
- S.line = 1
- S.column = 0
-
S.ch = ' '
S.offset = 0
S.rdOffset = 0
+ S.lineOffset = 0
S.insertSemi = false
S.ErrorCount = 0
@@ -145,14 +163,9 @@ func charString(ch int) string {
}
-func (S *Scanner) error(msg string) {
- S.errorAt(token.Position{S.filename, S.offset, S.line, S.column}, msg)
-}
-
-
-func (S *Scanner) errorAt(pos token.Position, msg string) {
+func (S *Scanner) error(offs int, msg string) {
if S.err != nil {
- S.err.Error(pos, msg)
+ S.err.Error(S.file.Position(S.file.Pos(offs)), msg)
}
S.ErrorCount++
}
@@ -166,9 +179,13 @@ func (S *Scanner) interpretLineComment(text []byte) {
if i := bytes.Index(text, []byte{':'}); i > 0 {
if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
// valid //line filename:line comment;
+ filename := path.Clean(string(text[len(prefix):i]))
+ if filename[0] != '/' {
+ // make filename relative to current directory
+ filename = path.Join(S.dir, filename)
+ }
// update scanner position
- S.filename = string(text[len(prefix):i])
- S.line = line - 1 // -1 since the '\n' has not been consumed yet
+ S.file.AddLineInfo(S.lineOffset, filename, line-1) // -1 since comment applies to next line
}
}
}
@@ -178,8 +195,6 @@ func (S *Scanner) interpretLineComment(text []byte) {
func (S *Scanner) scanComment() {
// initial '/' already consumed; S.ch == '/' || S.ch == '*'
offs := S.offset - 1 // position of initial '/'
- col := S.column - 1
- pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1}
if S.ch == '/' {
//-style comment
@@ -187,7 +202,7 @@ func (S *Scanner) scanComment() {
for S.ch != '\n' && S.ch >= 0 {
S.next()
}
- if col == 1 {
+ if offs == S.lineOffset {
// comment starts at the beginning of the current line
S.interpretLineComment(S.src[offs:S.offset])
}
@@ -205,24 +220,20 @@ func (S *Scanner) scanComment() {
}
}
- S.errorAt(pos, "comment not terminated")
+ S.error(offs, "comment not terminated")
}
func (S *Scanner) findLineEnd() bool {
// initial '/' already consumed
- defer func(line, col, offs int) {
+ defer func(offs int) {
// reset scanner state to where it was upon calling findLineEnd
- // (we don't scan //line comments and ignore errors thus
- // S.filename and S.ErrorCount don't change)
- S.line = line
- S.column = col
S.ch = '/'
S.offset = offs
S.rdOffset = offs + 1
S.next() // consume initial '/' again
- }(S.line, S.column-1, S.offset-1)
+ }(S.offset - 1)
// read ahead until a newline, EOF, or non-comment token is found
for S.ch == '/' || S.ch == '*' {
@@ -309,7 +320,7 @@ func (S *Scanner) scanNumber(seenDecimalPoint bool) token.Token {
if S.ch == '0' {
// int or float
- pos := token.Position{S.filename, S.offset, S.line, S.column}
+ offs := S.offset
S.next()
if S.ch == 'x' || S.ch == 'X' {
// hexadecimal int
@@ -329,7 +340,7 @@ func (S *Scanner) scanNumber(seenDecimalPoint bool) token.Token {
}
// octal int
if seenDecimalDigit {
- S.errorAt(pos, "illegal octal number")
+ S.error(offs, "illegal octal number")
}
}
goto exit
@@ -366,7 +377,7 @@ exit:
func (S *Scanner) scanEscape(quote int) {
- pos := token.Position{S.filename, S.offset, S.line, S.column}
+ offs := S.offset
var i, base, max uint32
switch S.ch {
@@ -386,7 +397,7 @@ func (S *Scanner) scanEscape(quote int) {
i, base, max = 8, 16, unicode.MaxRune
default:
S.next() // always make progress
- S.errorAt(pos, "unknown escape sequence")
+ S.error(offs, "unknown escape sequence")
return
}
@@ -394,7 +405,7 @@ func (S *Scanner) scanEscape(quote int) {
for ; i > 0 && S.ch != quote && S.ch >= 0; i-- {
d := uint32(digitVal(S.ch))
if d >= base {
- S.error("illegal character in escape sequence")
+ S.error(S.offset, "illegal character in escape sequence")
break
}
x = x*base + d
@@ -405,14 +416,14 @@ func (S *Scanner) scanEscape(quote int) {
S.next()
}
if x > max || 0xd800 <= x && x < 0xe000 {
- S.errorAt(pos, "escape sequence is invalid Unicode code point")
+ S.error(offs, "escape sequence is invalid Unicode code point")
}
}
func (S *Scanner) scanChar() {
// '\'' opening already consumed
- pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1}
+ offs := S.offset - 1
n := 0
for S.ch != '\'' {
@@ -420,7 +431,7 @@ func (S *Scanner) scanChar() {
n++
S.next()
if ch == '\n' || ch < 0 {
- S.errorAt(pos, "character literal not terminated")
+ S.error(offs, "character literal not terminated")
n = 1
break
}
@@ -432,20 +443,20 @@ func (S *Scanner) scanChar() {
S.next()
if n != 1 {
- S.errorAt(pos, "illegal character literal")
+ S.error(offs, "illegal character literal")
}
}
func (S *Scanner) scanString() {
// '"' opening already consumed
- pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1}
+ offs := S.offset - 1
for S.ch != '"' {
ch := S.ch
S.next()
if ch == '\n' || ch < 0 {
- S.errorAt(pos, "string not terminated")
+ S.error(offs, "string not terminated")
break
}
if ch == '\\' {
@@ -459,13 +470,13 @@ func (S *Scanner) scanString() {
func (S *Scanner) scanRawString() {
// '`' opening already consumed
- pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1}
+ offs := S.offset - 1
for S.ch != '`' {
ch := S.ch
S.next()
if ch < 0 {
- S.errorAt(pos, "string not terminated")
+ S.error(offs, "string not terminated")
break
}
}
@@ -544,14 +555,18 @@ var newline = []byte{'\n'}
// must check the scanner's ErrorCount or the number of calls
// of the error handler, if there was one installed.
//
-func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
+// Scan adds line information to the file added to the file
+// set with Init. Token positions are relative to that file
+// and thus relative to the file set.
+//
+func (S *Scanner) Scan() (token.Pos, token.Token, []byte) {
scanAgain:
S.skipWhitespace()
// current token start
insertSemi := false
- pos, tok = token.Position{S.filename, S.offset, S.line, S.column}, token.ILLEGAL
offs := S.offset
+ tok := token.ILLEGAL
// determine token value
switch ch := S.ch; {
@@ -570,7 +585,7 @@ scanAgain:
case -1:
if S.insertSemi {
S.insertSemi = false // EOF consumed
- return pos, token.SEMICOLON, newline
+ return S.file.Pos(offs), token.SEMICOLON, newline
}
tok = token.EOF
case '\n':
@@ -578,7 +593,7 @@ scanAgain:
// set in the first place and exited early
// from S.skipWhitespace()
S.insertSemi = false // newline consumed
- return pos, token.SEMICOLON, newline
+ return S.file.Pos(offs), token.SEMICOLON, newline
case '"':
insertSemi = true
tok = token.STRING
@@ -640,17 +655,13 @@ scanAgain:
case '/':
if S.ch == '/' || S.ch == '*' {
// comment
- line := S.line
- col := S.column - 1 // beginning of comment
if S.insertSemi && S.findLineEnd() {
// reset position to the beginning of the comment
- S.line = line
- S.column = col
S.ch = '/'
S.offset = offs
S.rdOffset = offs + 1
S.insertSemi = false // newline consumed
- return pos, token.SEMICOLON, newline
+ return S.file.Pos(offs), token.SEMICOLON, newline
}
S.scanComment()
if S.mode&ScanComments == 0 {
@@ -690,7 +701,7 @@ scanAgain:
tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
default:
if S.mode&AllowIllegalChars == 0 {
- S.errorAt(pos, "illegal character "+charString(ch))
+ S.error(offs, "illegal character "+charString(ch))
}
insertSemi = S.insertSemi // preserve insertSemi info
}
@@ -699,21 +710,5 @@ scanAgain:
if S.mode&InsertSemis != 0 {
S.insertSemi = insertSemi
}
- return pos, tok, S.src[offs:S.offset]
-}
-
-
-// Tokenize calls a function f with the token position, token value, and token
-// text for each token in the source src. The other parameters have the same
-// meaning as for the Init function. Tokenize keeps scanning until f returns
-// false (usually when the token value is token.EOF). The result is the number
-// of errors encountered.
-//
-func Tokenize(filename string, src []byte, err ErrorHandler, mode uint, f func(pos token.Position, tok token.Token, lit []byte) bool) int {
- var s Scanner
- s.Init(filename, src, err, mode)
- for f(s.Scan()) {
- // action happens in f
- }
- return s.ErrorCount
+ return S.file.Pos(offs), tok, S.src[offs:S.offset]
}