diff options
Diffstat (limited to 'libgo/go/text/scanner/scanner.go')
-rw-r--r-- | libgo/go/text/scanner/scanner.go | 23 |
1 files changed, 14 insertions, 9 deletions
diff --git a/libgo/go/text/scanner/scanner.go b/libgo/go/text/scanner/scanner.go index 565650edf9c..e0d86e343da 100644 --- a/libgo/go/text/scanner/scanner.go +++ b/libgo/go/text/scanner/scanner.go @@ -5,7 +5,8 @@ // Package scanner provides a scanner and tokenizer for UTF-8-encoded text. // It takes an io.Reader providing the source, which then can be tokenized // through repeated calls to the Scan function. For compatibility with -// existing tools, the NUL character is not allowed. +// existing tools, the NUL character is not allowed. If the first character +// in the source is a UTF-8 encoded byte order mark (BOM), it is discarded. // // By default, a Scanner skips white space and Go comments and recognizes all // literals as defined by the Go language specification. It may be @@ -208,11 +209,6 @@ func (s *Scanner) Init(src io.Reader) *Scanner { return s } -// TODO(gri): The code for next() and the internal scanner state could benefit -// from a rethink. While next() is optimized for the common ASCII -// case, the "corrections" needed for proper position tracking undo -// some of the attempts for fast-path optimization. - // next reads and returns the next Unicode character. It is designed such // that only a minimal amount of work needs to be done in the common ASCII // case (one test to check for both ASCII and end-of-buffer, and one test @@ -316,7 +312,11 @@ func (s *Scanner) Next() rune { // character of the source. func (s *Scanner) Peek() rune { if s.ch < 0 { + // this code is only run for the very first character s.ch = s.next() + if s.ch == '\uFEFF' { + s.ch = s.next() // ignore BOM + } } return s.ch } @@ -389,15 +389,20 @@ func (s *Scanner) scanNumber(ch rune) (rune, rune) { if ch == 'x' || ch == 'X' { // hexadecimal int ch = s.next() + hasMantissa := false for digitVal(ch) < 16 { ch = s.next() + hasMantissa = true + } + if !hasMantissa { + s.error("illegal hexadecimal number") } } else { // octal int or float - seenDecimalDigit := false + has8or9 := false for isDecimal(ch) { if ch > '7' { - seenDecimalDigit = true + has8or9 = true } ch = s.next() } @@ -408,7 +413,7 @@ func (s *Scanner) scanNumber(ch rune) (rune, rune) { return Float, ch } // octal int - if seenDecimalDigit { + if has8or9 { s.error("illegal octal number") } } |