summaryrefslogtreecommitdiff
path: root/libgo/go/text/scanner/scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/text/scanner/scanner.go')
-rw-r--r--libgo/go/text/scanner/scanner.go23
1 files changed, 14 insertions, 9 deletions
diff --git a/libgo/go/text/scanner/scanner.go b/libgo/go/text/scanner/scanner.go
index 565650edf9c..e0d86e343da 100644
--- a/libgo/go/text/scanner/scanner.go
+++ b/libgo/go/text/scanner/scanner.go
@@ -5,7 +5,8 @@
// Package scanner provides a scanner and tokenizer for UTF-8-encoded text.
// It takes an io.Reader providing the source, which then can be tokenized
// through repeated calls to the Scan function. For compatibility with
-// existing tools, the NUL character is not allowed.
+// existing tools, the NUL character is not allowed. If the first character
+// in the source is a UTF-8 encoded byte order mark (BOM), it is discarded.
//
// By default, a Scanner skips white space and Go comments and recognizes all
// literals as defined by the Go language specification. It may be
@@ -208,11 +209,6 @@ func (s *Scanner) Init(src io.Reader) *Scanner {
return s
}
-// TODO(gri): The code for next() and the internal scanner state could benefit
-// from a rethink. While next() is optimized for the common ASCII
-// case, the "corrections" needed for proper position tracking undo
-// some of the attempts for fast-path optimization.
-
// next reads and returns the next Unicode character. It is designed such
// that only a minimal amount of work needs to be done in the common ASCII
// case (one test to check for both ASCII and end-of-buffer, and one test
@@ -316,7 +312,11 @@ func (s *Scanner) Next() rune {
// character of the source.
func (s *Scanner) Peek() rune {
if s.ch < 0 {
+ // this code is only run for the very first character
s.ch = s.next()
+ if s.ch == '\uFEFF' {
+ s.ch = s.next() // ignore BOM
+ }
}
return s.ch
}
@@ -389,15 +389,20 @@ func (s *Scanner) scanNumber(ch rune) (rune, rune) {
if ch == 'x' || ch == 'X' {
// hexadecimal int
ch = s.next()
+ hasMantissa := false
for digitVal(ch) < 16 {
ch = s.next()
+ hasMantissa = true
+ }
+ if !hasMantissa {
+ s.error("illegal hexadecimal number")
}
} else {
// octal int or float
- seenDecimalDigit := false
+ has8or9 := false
for isDecimal(ch) {
if ch > '7' {
- seenDecimalDigit = true
+ has8or9 = true
}
ch = s.next()
}
@@ -408,7 +413,7 @@ func (s *Scanner) scanNumber(ch rune) (rune, rune) {
return Float, ch
}
// octal int
- if seenDecimalDigit {
+ if has8or9 {
s.error("illegal octal number")
}
}