summaryrefslogtreecommitdiff
path: root/libgo/go/text/scanner/scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/text/scanner/scanner.go')
-rw-r--r--libgo/go/text/scanner/scanner.go37
1 files changed, 28 insertions, 9 deletions
diff --git a/libgo/go/text/scanner/scanner.go b/libgo/go/text/scanner/scanner.go
index e0d86e343d..5199ee4fc7 100644
--- a/libgo/go/text/scanner/scanner.go
+++ b/libgo/go/text/scanner/scanner.go
@@ -11,7 +11,7 @@
// By default, a Scanner skips white space and Go comments and recognizes all
// literals as defined by the Go language specification. It may be
// customized to recognize only a subset of those literals and to recognize
-// different white space characters.
+// different identifier and white space characters.
//
// Basic usage pattern:
//
@@ -34,8 +34,6 @@ import (
"unicode/utf8"
)
-// TODO(gri): Consider changing this to use the new (token) Position package.
-
// A source position is represented by a Position value.
// A position is valid if Line > 0.
type Position struct {
@@ -68,6 +66,12 @@ func (pos Position) String() string {
//
// ScanIdents | ScanInts | SkipComments
//
+// With the exceptions of comments, which are skipped if SkipComments is
+// set, unrecognized tokens are not ignored. Instead, the scanner simply
+// returns the respective individual characters (or possibly sub-tokens).
+// For instance, if the mode is ScanIdents (not ScanStrings), the string
+// "foo" is scanned as the token sequence '"' Ident '"'.
+//
const (
ScanIdents = 1 << -Ident
ScanInts = 1 << -Int
@@ -164,6 +168,13 @@ type Scanner struct {
// for values ch > ' '). The field may be changed at any time.
Whitespace uint64
+ // IsIdentRune is a predicate controlling the characters accepted
+ // as the ith rune in an identifier. The set of valid characters
+ // must not intersect with the set of white space characters.
+ // If no IsIdentRune function is set, regular Go identifiers are
+ // accepted instead. The field may be changed at any time.
+ IsIdentRune func(ch rune, i int) bool
+
// Start position of most recently scanned token; set by Scan.
// Calling Init or Next invalidates the position (Line == 0).
// The Filename field is always left untouched by the Scanner.
@@ -240,6 +251,9 @@ func (s *Scanner) next() rune {
s.srcEnd = i + n
s.srcBuf[s.srcEnd] = utf8.RuneSelf // sentinel
if err != nil {
+ if err != io.EOF {
+ s.error(err.Error())
+ }
if s.srcEnd == 0 {
if s.lastCharLen > 0 {
// previous character was not EOF
@@ -248,9 +262,6 @@ func (s *Scanner) next() rune {
s.lastCharLen = 0
return EOF
}
- if err != io.EOF {
- s.error(err.Error())
- }
// If err == EOF, we won't be getting more
// bytes; break to avoid infinite loop. If
// err is something else, we don't know if
@@ -334,9 +345,17 @@ func (s *Scanner) error(msg string) {
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
}
+func (s *Scanner) isIdentRune(ch rune, i int) bool {
+ if s.IsIdentRune != nil {
+ return s.IsIdentRune(ch, i)
+ }
+ return ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) && i > 0
+}
+
func (s *Scanner) scanIdentifier() rune {
- ch := s.next() // read character after first '_' or letter
- for ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) {
+ // we know the zero'th rune is OK; start scanning at the next one
+ ch := s.next()
+ for i := 1; s.isIdentRune(ch, i); i++ {
ch = s.next()
}
return ch
@@ -563,7 +582,7 @@ redo:
// determine token value
tok := ch
switch {
- case unicode.IsLetter(ch) || ch == '_':
+ case s.isIdentRune(ch, 0):
if s.Mode&ScanIdents != 0 {
tok = Ident
ch = s.scanIdentifier()