summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Wingo <wingo@pobox.com>2016-05-07 12:41:07 +0200
committerAndy Wingo <wingo@pobox.com>2016-05-07 12:41:07 +0200
commitd77b50476a340338615aa9f9952a2001f5f139f1 (patch)
tree0548bd81e571b41232aabcea3167d81917c050f7
parent13f2128587d5e3e99a99ea49b16855ffb1ab9399 (diff)
downloadguile-d77b50476a340338615aa9f9952a2001f5f139f1.tar.gz
Speed refactors to Scheme %peek-char
* module/ice-9/ports.scm (peek-bytes): New helper. (peek-byte): Use peek-bytes helper. (decoding-error): Don't inline; no need. (decode-utf8, bad-utf8-len): New helpers. (peek-char-and-len/utf8): Use new helpers. (peek-char-and-len): No fast paths, and not inline. Peek-char has its own fast path. (%peek-char): Use helpers to make fast path.
-rw-r--r--module/ice-9/ports.scm238
1 files changed, 143 insertions, 95 deletions
diff --git a/module/ice-9/ports.scm b/module/ice-9/ports.scm
index 27bcdd855..a222e834a 100644
--- a/module/ice-9/ports.scm
+++ b/module/ice-9/ports.scm
@@ -286,94 +286,134 @@ interpret its input and output."
(lp buffered)
(values buf buffered)))))))))))))))
-(define-inlinable (peek-byte port)
+(define-inlinable (peek-bytes port count kfast kslow)
(let* ((buf (port-read-buffer port))
- (cur (port-buffer-cur buf)))
- (if (< cur (port-buffer-end buf))
- (bytevector-u8-ref (port-buffer-bytevector buf) cur)
- (call-with-values (lambda () (fill-input port))
+ (cur (port-buffer-cur buf))
+ (buffered (- (port-buffer-end buf) cur)))
+ (if (<= count buffered)
+ (kfast buf (port-buffer-bytevector buf) cur buffered)
+ (call-with-values (lambda () (fill-input port count))
(lambda (buf buffered)
- (if (zero? buffered)
- the-eof-object
- (bytevector-u8-ref (port-buffer-bytevector buf)
- (port-buffer-cur buf))))))))
-
-;; GNU/Linux definition; fixme?
-(define-syntax EILSEQ (identifier-syntax 84))
-
-(define-syntax-rule (decoding-error subr port)
+ (kslow buf (port-buffer-bytevector buf) (port-buffer-cur buf)
+ buffered))))))
+
+(define (peek-byte port)
+ (peek-bytes port 1
+ (lambda (buf bv cur buffered)
+ (bytevector-u8-ref bv cur))
+ (lambda (buf bv cur buffered)
+ (and (> buffered 0)
+ (bytevector-u8-ref bv cur)))))
+
+(define (decoding-error subr port)
+ ;; GNU/Linux definition; fixme?
+ (define EILSEQ 84)
(throw 'decoding-error subr "input decoding error" EILSEQ port))
+(define-inlinable (decode-utf8 bv start avail u8_0 kt kf)
+ (cond
+ ((< u8_0 #x80)
+ (kt (integer->char u8_0) 1))
+ ((and (<= #xc2 u8_0 #xdf) (<= 2 avail))
+ (let ((u8_1 (bytevector-u8-ref bv (1+ start))))
+ (if (= (logand u8_1 #xc0) #x80)
+ (kt (integer->char
+ (logior (ash (logand u8_0 #x1f) 6)
+ (logand u8_1 #x3f)))
+ 2)
+ (kf))))
+ ((and (= (logand u8_0 #xf0) #xe0) (<= 3 avail))
+ (let ((u8_1 (bytevector-u8-ref bv (+ start 1)))
+ (u8_2 (bytevector-u8-ref bv (+ start 2))))
+ (if (and (= (logand u8_1 #xc0) #x80)
+ (= (logand u8_2 #xc0) #x80)
+ (case u8_0
+ ((#xe0) (>= u8_1 #xa0))
+ ((#xed) (>= u8_1 #x9f))
+ (else #t)))
+ (kt (integer->char
+ (logior (ash (logand u8_0 #x0f) 12)
+ (ash (logand u8_1 #x3f) 6)
+ (logand u8_2 #x3f)))
+ 3)
+ (kf))))
+ ((and (<= #xf0 u8_0 #xf4) (<= 4 avail))
+ (let ((u8_1 (bytevector-u8-ref bv (+ start 1)))
+ (u8_2 (bytevector-u8-ref bv (+ start 2)))
+ (u8_3 (bytevector-u8-ref bv (+ start 3))))
+ (if (and (= (logand u8_1 #xc0) #x80)
+ (= (logand u8_2 #xc0) #x80)
+ (= (logand u8_3 #xc0) #x80)
+ (case u8_0
+ ((#xf0) (>= u8_1 #x90))
+ ((#xf4) (>= u8_1 #x8f))
+ (else #t)))
+ (kt (integer->char
+ (logior (ash (logand u8_0 #x07) 18)
+ (ash (logand u8_1 #x3f) 12)
+ (ash (logand u8_2 #x3f) 6)
+ (logand u8_3 #x3f)))
+ 4)
+ (kf))))
+ (else (kf))))
+
+(define (bad-utf8-len bv cur buffering first-byte)
+ (define (ref n)
+ (bytevector-u8-ref bv (+ cur 1)))
+ (cond
+ ((< first-byte #x80) 0)
+ ((<= #xc2 first-byte #xdf)
+ (cond
+ ((< buffering 2) 1)
+ ((not (= (logand (ref 1) #xc0) #x80)) 1)
+ (else 0)))
+ ((= (logand first-byte #xf0) #xe0)
+ (cond
+ ((< buffering 2) 1)
+ ((not (= (logand (ref 1) #xc0) #x80)) 1)
+ ((and (eq? first-byte #xe0) (< (ref 1) #xa0)) 1)
+ ((and (eq? first-byte #xed) (< (ref 1) #x9f)) 1)
+ ((< buffering 3) 2)
+ ((not (= (logand (ref 2) #xc0) #x80)) 2)
+ (else 0)))
+ ((<= #xf0 first-byte #xf4)
+ (cond
+ ((< buffering 2) 1)
+ ((not (= (logand (ref 1) #xc0) #x80)) 1)
+ ((and (eq? first-byte #xf0) (< (ref 1) #x90)) 1)
+ ((and (eq? first-byte #xf4) (< (ref 1) #x8f)) 1)
+ ((< buffering 3) 2)
+ ((not (= (logand (ref 2) #xc0) #x80)) 2)
+ ((< buffering 4) 3)
+ ((not (= (logand (ref 3) #xc0) #x80)) 3)
+ (else 0)))
+ (else 1)))
+
(define (peek-char-and-len/utf8 port first-byte)
(define (bad-utf8 len)
(if (eq? (port-conversion-strategy port) 'substitute)
(values #\? len)
(decoding-error "peek-char" port)))
- (cond
- ((< first-byte #x80)
- (values (integer->char first-byte) 1))
- ((<= #xc2 first-byte #xdf)
- (call-with-values (lambda () (fill-input port 2))
- (lambda (buf buffering)
- (let ((bv (port-buffer-bytevector buf))
- (cur (port-buffer-cur buf)))
- (define (ref n)
- (bytevector-u8-ref bv (+ cur 1)))
- (when (or (< buffering 2)
- (not (= (logand (ref 1) #xc0) #x80)))
- (bad-utf8 1))
- (values (integer->char
- (logior (ash (logand first-byte #x1f) 6)
- (logand (ref 1) #x3f)))
- 2)))))
- ((= (logand first-byte #xf0) #xe0)
- (call-with-values (lambda () (fill-input port 3))
- (lambda (buf buffering)
- (let ((bv (port-buffer-bytevector buf))
- (cur (port-buffer-cur buf)))
- (define (ref n)
- (bytevector-u8-ref bv (+ cur 1)))
- (when (or (< buffering 2)
- (not (= (logand (ref 1) #xc0) #x80))
- (and (eq? first-byte #xe0) (< (ref 1) #xa0))
- (and (eq? first-byte #xed) (< (ref 1) #x9f)))
- (bad-utf8 1))
- (when (or (< buffering 3)
- (not (= (logand (ref 2) #xc0) #x80)))
- (bad-utf8 2))
- (values (integer->char
- (logior (ash (logand first-byte #x0f) 12)
- (ash (logand (ref 1) #x3f) 6)
- (logand (ref 2) #x3f)))
- 3)))))
- ((<= #xf0 first-byte #xf4)
- (call-with-values (lambda () (fill-input port 4))
- (lambda (buf buffering)
- (let ((bv (port-buffer-bytevector buf))
- (cur (port-buffer-cur buf)))
- (define (ref n)
- (bytevector-u8-ref bv (+ cur 1)))
- (when (or (< buffering 2)
- (not (= (logand (ref 1) #xc0) #x80))
- (and (eq? first-byte #xf0) (< (ref 1) #x90))
- (and (eq? first-byte #xf4) (< (ref 1) #x8f)))
- (bad-utf8 1))
- (when (or (< buffering 3)
- (not (= (logand (ref 2) #xc0) #x80)))
- (bad-utf8 2))
- (when (or (< buffering 4)
- (not (= (logand (ref 3) #xc0) #x80)))
- (bad-utf8 3))
- (values (integer->char
- (logior (ash (logand first-byte #x07) 18)
- (ash (logand (ref 1) #x3f) 12)
- (ash (logand (ref 2) #x3f) 6)
- (logand (ref 3) #x3f)))
- 4)))))
- (else
- (bad-utf8 1))))
-
-(define-inlinable (peek-char-and-len/iso-8859-1 port first-byte)
+ (if (< first-byte #x80)
+ (values (integer->char first-byte) 1)
+ (call-with-values (lambda ()
+ (fill-input port
+ (cond
+ ((<= #xc2 first-byte #xdf) 2)
+ ((= (logand first-byte #xf0) #xe0) 3)
+ (else 4))))
+ (lambda (buf buffering)
+ (let* ((bv (port-buffer-bytevector buf))
+ (cur (port-buffer-cur buf)))
+ (define (bad-utf8)
+ (let ((len (bad-utf8-len bv cur buffering first-byte)))
+ (when (zero? len) (error "internal error"))
+ (if (eq? (port-conversion-strategy port) 'substitute)
+ (values #\? len)
+ (decoding-error "peek-char" port))))
+ (decode-utf8 bv cur buffering first-byte values bad-utf8))))))
+
+(define (peek-char-and-len/iso-8859-1 port first-byte)
(values (integer->char first-byte) 1))
(define (peek-char-and-len/iconv port first-byte)
@@ -398,25 +438,33 @@ interpret its input and output."
(else
(lp input-size))))))
-(define-inlinable (peek-char-and-len port)
+(define (peek-char-and-len port)
(let ((first-byte (peek-byte port)))
- (if (eq? first-byte the-eof-object)
- (values first-byte 0)
- (let ((first-byte (logand first-byte #xff)))
- (case (%port-encoding port)
- ((UTF-8)
- (if (< first-byte #x80)
- (values (integer->char first-byte) 1)
- (peek-char-and-len/utf8 port first-byte)))
- ((ISO-8859-1)
- (peek-char-and-len/iso-8859-1 port first-byte))
- (else
- (peek-char-and-len/iconv port first-byte)))))))
+ (if (not first-byte)
+ (values the-eof-object 0)
+ (case (%port-encoding port)
+ ((UTF-8)
+ (peek-char-and-len/utf8 port first-byte))
+ ((ISO-8859-1)
+ (peek-char-and-len/iso-8859-1 port first-byte))
+ (else
+ (peek-char-and-len/iconv port first-byte))))))
(define (%peek-char port)
- (call-with-values (lambda () (peek-char-and-len port))
- (lambda (char len)
- char)))
+ (define (slow-path)
+ (call-with-values (lambda () (peek-char-and-len port))
+ (lambda (char len)
+ char)))
+ (define (fast-path buf bv cur buffered)
+ (let ((u8 (bytevector-u8-ref bv cur))
+ (enc (%port-encoding port)))
+ (case enc
+ ((UTF-8) (decode-utf8 bv cur buffered u8 (lambda (char len) char)
+ slow-path))
+ ((ISO-8859-1) (integer->char u8))
+ (else (slow-path)))))
+ (peek-bytes port 1 fast-path
+ (lambda (buf bv cur buffered) (slow-path))))