summaryrefslogtreecommitdiff
path: root/libgo/go/scanner/scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/scanner/scanner.go')
-rw-r--r--libgo/go/scanner/scanner.go35
1 files changed, 5 insertions, 30 deletions
diff --git a/libgo/go/scanner/scanner.go b/libgo/go/scanner/scanner.go
index e79d392f70c..8fbcb9c1155 100644
--- a/libgo/go/scanner/scanner.go
+++ b/libgo/go/scanner/scanner.go
@@ -34,7 +34,6 @@ import (
"utf8"
)
-
// TODO(gri): Consider changing this to use the new (token) Position package.
// A source position is represented by a Position value.
@@ -46,11 +45,9 @@ type Position struct {
Column int // column number, starting at 1 (character count per line)
}
-
// IsValid returns true if the position is valid.
func (pos *Position) IsValid() bool { return pos.Line > 0 }
-
func (pos Position) String() string {
s := pos.Filename
if pos.IsValid() {
@@ -65,7 +62,6 @@ func (pos Position) String() string {
return s
}
-
// Predefined mode bits to control recognition of tokens. For instance,
// to configure a Scanner such that it only recognizes (Go) identifiers,
// integers, and skips comments, set the Scanner's Mode field to:
@@ -84,7 +80,6 @@ const (
GoTokens = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments
)
-
// The result of Scan is one of the following tokens or a Unicode character.
const (
EOF = -(iota + 1)
@@ -98,7 +93,6 @@ const (
skipComment
)
-
var tokenString = map[int]string{
EOF: "EOF",
Ident: "Ident",
@@ -110,7 +104,6 @@ var tokenString = map[int]string{
Comment: "Comment",
}
-
// TokenString returns a (visible) string for a token or Unicode character.
func TokenString(tok int) string {
if s, found := tokenString[tok]; found {
@@ -119,12 +112,10 @@ func TokenString(tok int) string {
return fmt.Sprintf("%q", string(tok))
}
-
// GoWhitespace is the default value for the Scanner's Whitespace field.
// Its value selects Go's white space characters.
const GoWhitespace = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
-
const bufLen = 1024 // at least utf8.UTFMax
// A Scanner implements reading of Unicode characters and tokens from an io.Reader.
@@ -179,7 +170,6 @@ type Scanner struct {
Position
}
-
// Init initializes a Scanner with a new source and returns s.
// Error is set to nil, ErrorCount is set to 0, Mode is set to GoTokens,
// and Whitespace is set to GoWhitespace.
@@ -215,7 +205,6 @@ func (s *Scanner) Init(src io.Reader) *Scanner {
return s
}
-
// TODO(gri): The code for next() and the internal scanner state could benefit
// from a rethink. While next() is optimized for the common ASCII
// case, the "corrections" needed for proper position tracking undo
@@ -276,7 +265,12 @@ func (s *Scanner) next() int {
// uncommon case: not ASCII
ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd])
if ch == utf8.RuneError && width == 1 {
+ // advance for correct error position
+ s.srcPos += width
+ s.lastCharLen = width
+ s.column++
s.error("illegal UTF-8 encoding")
+ return ch
}
}
}
@@ -300,7 +294,6 @@ func (s *Scanner) next() int {
return ch
}
-
// Next reads and returns the next Unicode character.
// It returns EOF at the end of the source. It reports
// a read error by calling s.Error, if not nil; otherwise
@@ -314,7 +307,6 @@ func (s *Scanner) Next() int {
return ch
}
-
// Peek returns the next Unicode character in the source without advancing
// the scanner. It returns EOF if the scanner's position is at the last
// character of the source.
@@ -325,7 +317,6 @@ func (s *Scanner) Peek() int {
return s.ch
}
-
func (s *Scanner) error(msg string) {
s.ErrorCount++
if s.Error != nil {
@@ -335,7 +326,6 @@ func (s *Scanner) error(msg string) {
fmt.Fprintf(os.Stderr, "%s: %s\n", s.Position, msg)
}
-
func (s *Scanner) scanIdentifier() int {
ch := s.next() // read character after first '_' or letter
for ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) {
@@ -344,7 +334,6 @@ func (s *Scanner) scanIdentifier() int {
return ch
}
-
func digitVal(ch int) int {
switch {
case '0' <= ch && ch <= '9':
@@ -357,10 +346,8 @@ func digitVal(ch int) int {
return 16 // larger than any legal digit val
}
-
func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' }
-
func (s *Scanner) scanMantissa(ch int) int {
for isDecimal(ch) {
ch = s.next()
@@ -368,7 +355,6 @@ func (s *Scanner) scanMantissa(ch int) int {
return ch
}
-
func (s *Scanner) scanFraction(ch int) int {
if ch == '.' {
ch = s.scanMantissa(s.next())
@@ -376,7 +362,6 @@ func (s *Scanner) scanFraction(ch int) int {
return ch
}
-
func (s *Scanner) scanExponent(ch int) int {
if ch == 'e' || ch == 'E' {
ch = s.next()
@@ -388,7 +373,6 @@ func (s *Scanner) scanExponent(ch int) int {
return ch
}
-
func (s *Scanner) scanNumber(ch int) (int, int) {
// isDecimal(ch)
if ch == '0' {
@@ -433,7 +417,6 @@ func (s *Scanner) scanNumber(ch int) (int, int) {
return Int, ch
}
-
func (s *Scanner) scanDigits(ch, base, n int) int {
for n > 0 && digitVal(ch) < base {
ch = s.next()
@@ -445,7 +428,6 @@ func (s *Scanner) scanDigits(ch, base, n int) int {
return ch
}
-
func (s *Scanner) scanEscape(quote int) int {
ch := s.next() // read character after '/'
switch ch {
@@ -466,7 +448,6 @@ func (s *Scanner) scanEscape(quote int) int {
return ch
}
-
func (s *Scanner) scanString(quote int) (n int) {
ch := s.next() // read character after quote
for ch != quote {
@@ -484,7 +465,6 @@ func (s *Scanner) scanString(quote int) (n int) {
return
}
-
func (s *Scanner) scanRawString() {
ch := s.next() // read character after '`'
for ch != '`' {
@@ -496,14 +476,12 @@ func (s *Scanner) scanRawString() {
}
}
-
func (s *Scanner) scanChar() {
if s.scanString('\'') != 1 {
s.error("illegal char literal")
}
}
-
func (s *Scanner) scanComment(ch int) int {
// ch == '/' || ch == '*'
if ch == '/' {
@@ -532,7 +510,6 @@ func (s *Scanner) scanComment(ch int) int {
return ch
}
-
// Scan reads the next token or Unicode character from source and returns it.
// It only recognizes tokens t for which the respective Mode bit (1<<-t) is set.
// It returns EOF at the end of the source. It reports scanner errors (read and
@@ -635,7 +612,6 @@ redo:
return tok
}
-
// Pos returns the position of the character immediately after
// the character or token returned by the last call to Next or Scan.
func (s *Scanner) Pos() (pos Position) {
@@ -658,7 +634,6 @@ func (s *Scanner) Pos() (pos Position) {
return
}
-
// TokenText returns the string corresponding to the most recently scanned token.
// Valid after calling Scan().
func (s *Scanner) TokenText() string {