diff options
Diffstat (limited to 'libgo/go/go/scanner/scanner.go')
-rw-r--r-- | libgo/go/go/scanner/scanner.go | 165 |
1 files changed, 80 insertions, 85 deletions
diff --git a/libgo/go/go/scanner/scanner.go b/libgo/go/go/scanner/scanner.go index 64ff127750d..8c3205230e8 100644 --- a/libgo/go/go/scanner/scanner.go +++ b/libgo/go/go/scanner/scanner.go @@ -4,13 +4,25 @@ // A scanner for Go source text. Takes a []byte as source which can // then be tokenized through repeated calls to the Scan function. -// For a sample use of a scanner, see the implementation of Tokenize. +// Typical use: +// +// var s Scanner +// fset := token.NewFileSet() // position information is relative to fset +// s.Init(fset, filename, src, nil /* no error handler */, 0) +// for { +// pos, tok, lit := s.Scan() +// if tok == token.EOF { +// break +// } +// // do something here with pos, tok, and lit +// } // package scanner import ( "bytes" "go/token" + "path" "strconv" "unicode" "utf8" @@ -19,23 +31,21 @@ import ( // A Scanner holds the scanner's internal state while processing // a given text. It can be allocated as part of another data -// structure but must be initialized via Init before use. For -// a sample use, see the implementation of Tokenize. +// structure but must be initialized via Init before use. // type Scanner struct { // immutable state + file *token.File // source file handle + dir string // directory portion of file.Name() src []byte // source err ErrorHandler // error reporting; or nil mode uint // scanning mode // scanning state - filename string // current filename; may change via //line filename:line comment - line int // current line - column int // current column - ch int // current character offset int // character offset rdOffset int // reading offset (position after current character) + lineOffset int // current line offset insertSemi bool // insert a semicolon before next newline // public state - ok to modify @@ -47,22 +57,21 @@ type Scanner struct { // S.ch < 0 means end-of-file. // func (S *Scanner) next() { - S.column++ if S.rdOffset < len(S.src) { S.offset = S.rdOffset if S.ch == '\n' { - S.line++ - S.column = 1 + S.lineOffset = S.offset + S.file.AddLine(S.offset) } r, w := int(S.src[S.rdOffset]), 1 switch { case r == 0: - S.error("illegal character NUL") + S.error(S.offset, "illegal character NUL") case r >= 0x80: // not ASCII r, w = utf8.DecodeRune(S.src[S.rdOffset:]) if r == utf8.RuneError && w == 1 { - S.error("illegal UTF-8 encoding") + S.error(S.offset, "illegal UTF-8 encoding") } } S.rdOffset += w @@ -70,7 +79,8 @@ func (S *Scanner) next() { } else { S.offset = len(S.src) if S.ch == '\n' { - S.column = 1 + S.lineOffset = S.offset + S.file.AddLine(S.offset) } S.ch = -1 // eof } @@ -86,28 +96,36 @@ const ( InsertSemis // automatically insert semicolons ) - -// Init prepares the scanner S to tokenize the text src. Calls to Scan -// will use the error handler err if they encounter a syntax error and -// err is not nil. Also, for each error encountered, the Scanner field -// ErrorCount is incremented by one. The filename parameter is used as -// filename in the token.Position returned by Scan for each token. The -// mode parameter determines how comments and illegal characters are -// handled. +// Init prepares the scanner S to tokenize the text src by setting the +// scanner at the beginning of src. The scanner uses the file set file +// for position information and it adds line information for each line. +// It is ok to re-use the same file when re-scanning the same file as +// line information which is already present is ignored. Init causes a +// panic if the file size does not match the src size. +// +// Calls to Scan will use the error handler err if they encounter a +// syntax error and err is not nil. Also, for each error encountered, +// the Scanner field ErrorCount is incremented by one. The mode parameter +// determines how comments, illegal characters, and semicolons are handled. +// +// Note that Init may call err if there is an error in the first character +// of the file. // -func (S *Scanner) Init(filename string, src []byte, err ErrorHandler, mode uint) { +func (S *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode uint) { // Explicitly initialize all fields since a scanner may be reused. + if file.Size() != len(src) { + panic("file size does not match src len") + } + S.file = file + S.dir, _ = path.Split(file.Name()) S.src = src S.err = err S.mode = mode - S.filename = filename - S.line = 1 - S.column = 0 - S.ch = ' ' S.offset = 0 S.rdOffset = 0 + S.lineOffset = 0 S.insertSemi = false S.ErrorCount = 0 @@ -145,14 +163,9 @@ func charString(ch int) string { } -func (S *Scanner) error(msg string) { - S.errorAt(token.Position{S.filename, S.offset, S.line, S.column}, msg) -} - - -func (S *Scanner) errorAt(pos token.Position, msg string) { +func (S *Scanner) error(offs int, msg string) { if S.err != nil { - S.err.Error(pos, msg) + S.err.Error(S.file.Position(S.file.Pos(offs)), msg) } S.ErrorCount++ } @@ -166,9 +179,13 @@ func (S *Scanner) interpretLineComment(text []byte) { if i := bytes.Index(text, []byte{':'}); i > 0 { if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 { // valid //line filename:line comment; + filename := path.Clean(string(text[len(prefix):i])) + if filename[0] != '/' { + // make filename relative to current directory + filename = path.Join(S.dir, filename) + } // update scanner position - S.filename = string(text[len(prefix):i]) - S.line = line - 1 // -1 since the '\n' has not been consumed yet + S.file.AddLineInfo(S.lineOffset, filename, line-1) // -1 since comment applies to next line } } } @@ -178,8 +195,6 @@ func (S *Scanner) interpretLineComment(text []byte) { func (S *Scanner) scanComment() { // initial '/' already consumed; S.ch == '/' || S.ch == '*' offs := S.offset - 1 // position of initial '/' - col := S.column - 1 - pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1} if S.ch == '/' { //-style comment @@ -187,7 +202,7 @@ func (S *Scanner) scanComment() { for S.ch != '\n' && S.ch >= 0 { S.next() } - if col == 1 { + if offs == S.lineOffset { // comment starts at the beginning of the current line S.interpretLineComment(S.src[offs:S.offset]) } @@ -205,24 +220,20 @@ func (S *Scanner) scanComment() { } } - S.errorAt(pos, "comment not terminated") + S.error(offs, "comment not terminated") } func (S *Scanner) findLineEnd() bool { // initial '/' already consumed - defer func(line, col, offs int) { + defer func(offs int) { // reset scanner state to where it was upon calling findLineEnd - // (we don't scan //line comments and ignore errors thus - // S.filename and S.ErrorCount don't change) - S.line = line - S.column = col S.ch = '/' S.offset = offs S.rdOffset = offs + 1 S.next() // consume initial '/' again - }(S.line, S.column-1, S.offset-1) + }(S.offset - 1) // read ahead until a newline, EOF, or non-comment token is found for S.ch == '/' || S.ch == '*' { @@ -309,7 +320,7 @@ func (S *Scanner) scanNumber(seenDecimalPoint bool) token.Token { if S.ch == '0' { // int or float - pos := token.Position{S.filename, S.offset, S.line, S.column} + offs := S.offset S.next() if S.ch == 'x' || S.ch == 'X' { // hexadecimal int @@ -329,7 +340,7 @@ func (S *Scanner) scanNumber(seenDecimalPoint bool) token.Token { } // octal int if seenDecimalDigit { - S.errorAt(pos, "illegal octal number") + S.error(offs, "illegal octal number") } } goto exit @@ -366,7 +377,7 @@ exit: func (S *Scanner) scanEscape(quote int) { - pos := token.Position{S.filename, S.offset, S.line, S.column} + offs := S.offset var i, base, max uint32 switch S.ch { @@ -386,7 +397,7 @@ func (S *Scanner) scanEscape(quote int) { i, base, max = 8, 16, unicode.MaxRune default: S.next() // always make progress - S.errorAt(pos, "unknown escape sequence") + S.error(offs, "unknown escape sequence") return } @@ -394,7 +405,7 @@ func (S *Scanner) scanEscape(quote int) { for ; i > 0 && S.ch != quote && S.ch >= 0; i-- { d := uint32(digitVal(S.ch)) if d >= base { - S.error("illegal character in escape sequence") + S.error(S.offset, "illegal character in escape sequence") break } x = x*base + d @@ -405,14 +416,14 @@ func (S *Scanner) scanEscape(quote int) { S.next() } if x > max || 0xd800 <= x && x < 0xe000 { - S.errorAt(pos, "escape sequence is invalid Unicode code point") + S.error(offs, "escape sequence is invalid Unicode code point") } } func (S *Scanner) scanChar() { // '\'' opening already consumed - pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1} + offs := S.offset - 1 n := 0 for S.ch != '\'' { @@ -420,7 +431,7 @@ func (S *Scanner) scanChar() { n++ S.next() if ch == '\n' || ch < 0 { - S.errorAt(pos, "character literal not terminated") + S.error(offs, "character literal not terminated") n = 1 break } @@ -432,20 +443,20 @@ func (S *Scanner) scanChar() { S.next() if n != 1 { - S.errorAt(pos, "illegal character literal") + S.error(offs, "illegal character literal") } } func (S *Scanner) scanString() { // '"' opening already consumed - pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1} + offs := S.offset - 1 for S.ch != '"' { ch := S.ch S.next() if ch == '\n' || ch < 0 { - S.errorAt(pos, "string not terminated") + S.error(offs, "string not terminated") break } if ch == '\\' { @@ -459,13 +470,13 @@ func (S *Scanner) scanString() { func (S *Scanner) scanRawString() { // '`' opening already consumed - pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1} + offs := S.offset - 1 for S.ch != '`' { ch := S.ch S.next() if ch < 0 { - S.errorAt(pos, "string not terminated") + S.error(offs, "string not terminated") break } } @@ -544,14 +555,18 @@ var newline = []byte{'\n'} // must check the scanner's ErrorCount or the number of calls // of the error handler, if there was one installed. // -func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) { +// Scan adds line information to the file added to the file +// set with Init. Token positions are relative to that file +// and thus relative to the file set. +// +func (S *Scanner) Scan() (token.Pos, token.Token, []byte) { scanAgain: S.skipWhitespace() // current token start insertSemi := false - pos, tok = token.Position{S.filename, S.offset, S.line, S.column}, token.ILLEGAL offs := S.offset + tok := token.ILLEGAL // determine token value switch ch := S.ch; { @@ -570,7 +585,7 @@ scanAgain: case -1: if S.insertSemi { S.insertSemi = false // EOF consumed - return pos, token.SEMICOLON, newline + return S.file.Pos(offs), token.SEMICOLON, newline } tok = token.EOF case '\n': @@ -578,7 +593,7 @@ scanAgain: // set in the first place and exited early // from S.skipWhitespace() S.insertSemi = false // newline consumed - return pos, token.SEMICOLON, newline + return S.file.Pos(offs), token.SEMICOLON, newline case '"': insertSemi = true tok = token.STRING @@ -640,17 +655,13 @@ scanAgain: case '/': if S.ch == '/' || S.ch == '*' { // comment - line := S.line - col := S.column - 1 // beginning of comment if S.insertSemi && S.findLineEnd() { // reset position to the beginning of the comment - S.line = line - S.column = col S.ch = '/' S.offset = offs S.rdOffset = offs + 1 S.insertSemi = false // newline consumed - return pos, token.SEMICOLON, newline + return S.file.Pos(offs), token.SEMICOLON, newline } S.scanComment() if S.mode&ScanComments == 0 { @@ -690,7 +701,7 @@ scanAgain: tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR) default: if S.mode&AllowIllegalChars == 0 { - S.errorAt(pos, "illegal character "+charString(ch)) + S.error(offs, "illegal character "+charString(ch)) } insertSemi = S.insertSemi // preserve insertSemi info } @@ -699,21 +710,5 @@ scanAgain: if S.mode&InsertSemis != 0 { S.insertSemi = insertSemi } - return pos, tok, S.src[offs:S.offset] -} - - -// Tokenize calls a function f with the token position, token value, and token -// text for each token in the source src. The other parameters have the same -// meaning as for the Init function. Tokenize keeps scanning until f returns -// false (usually when the token value is token.EOF). The result is the number -// of errors encountered. -// -func Tokenize(filename string, src []byte, err ErrorHandler, mode uint, f func(pos token.Position, tok token.Token, lit []byte) bool) int { - var s Scanner - s.Init(filename, src, err, mode) - for f(s.Scan()) { - // action happens in f - } - return s.ErrorCount + return S.file.Pos(offs), tok, S.src[offs:S.offset] } |