diff options
author | Andy Wingo <wingo@pobox.com> | 2016-05-07 12:41:07 +0200 |
---|---|---|
committer | Andy Wingo <wingo@pobox.com> | 2016-05-07 12:41:07 +0200 |
commit | d77b50476a340338615aa9f9952a2001f5f139f1 (patch) | |
tree | 0548bd81e571b41232aabcea3167d81917c050f7 | |
parent | 13f2128587d5e3e99a99ea49b16855ffb1ab9399 (diff) | |
download | guile-d77b50476a340338615aa9f9952a2001f5f139f1.tar.gz |
Speed refactors to Scheme %peek-char
* module/ice-9/ports.scm (peek-bytes): New helper.
(peek-byte): Use peek-bytes helper.
(decoding-error): Don't inline; no need.
(decode-utf8, bad-utf8-len): New helpers.
(peek-char-and-len/utf8): Use new helpers.
(peek-char-and-len): No fast paths, and not inline. Peek-char has its
own fast path.
(%peek-char): Use helpers to make fast path.
-rw-r--r-- | module/ice-9/ports.scm | 238 |
1 files changed, 143 insertions, 95 deletions
diff --git a/module/ice-9/ports.scm b/module/ice-9/ports.scm index 27bcdd855..a222e834a 100644 --- a/module/ice-9/ports.scm +++ b/module/ice-9/ports.scm @@ -286,94 +286,134 @@ interpret its input and output." (lp buffered) (values buf buffered))))))))))))))) -(define-inlinable (peek-byte port) +(define-inlinable (peek-bytes port count kfast kslow) (let* ((buf (port-read-buffer port)) - (cur (port-buffer-cur buf))) - (if (< cur (port-buffer-end buf)) - (bytevector-u8-ref (port-buffer-bytevector buf) cur) - (call-with-values (lambda () (fill-input port)) + (cur (port-buffer-cur buf)) + (buffered (- (port-buffer-end buf) cur))) + (if (<= count buffered) + (kfast buf (port-buffer-bytevector buf) cur buffered) + (call-with-values (lambda () (fill-input port count)) (lambda (buf buffered) - (if (zero? buffered) - the-eof-object - (bytevector-u8-ref (port-buffer-bytevector buf) - (port-buffer-cur buf)))))))) - -;; GNU/Linux definition; fixme? -(define-syntax EILSEQ (identifier-syntax 84)) - -(define-syntax-rule (decoding-error subr port) + (kslow buf (port-buffer-bytevector buf) (port-buffer-cur buf) + buffered)))))) + +(define (peek-byte port) + (peek-bytes port 1 + (lambda (buf bv cur buffered) + (bytevector-u8-ref bv cur)) + (lambda (buf bv cur buffered) + (and (> buffered 0) + (bytevector-u8-ref bv cur))))) + +(define (decoding-error subr port) + ;; GNU/Linux definition; fixme? + (define EILSEQ 84) (throw 'decoding-error subr "input decoding error" EILSEQ port)) +(define-inlinable (decode-utf8 bv start avail u8_0 kt kf) + (cond + ((< u8_0 #x80) + (kt (integer->char u8_0) 1)) + ((and (<= #xc2 u8_0 #xdf) (<= 2 avail)) + (let ((u8_1 (bytevector-u8-ref bv (1+ start)))) + (if (= (logand u8_1 #xc0) #x80) + (kt (integer->char + (logior (ash (logand u8_0 #x1f) 6) + (logand u8_1 #x3f))) + 2) + (kf)))) + ((and (= (logand u8_0 #xf0) #xe0) (<= 3 avail)) + (let ((u8_1 (bytevector-u8-ref bv (+ start 1))) + (u8_2 (bytevector-u8-ref bv (+ start 2)))) + (if (and (= (logand u8_1 #xc0) #x80) + (= (logand u8_2 #xc0) #x80) + (case u8_0 + ((#xe0) (>= u8_1 #xa0)) + ((#xed) (>= u8_1 #x9f)) + (else #t))) + (kt (integer->char + (logior (ash (logand u8_0 #x0f) 12) + (ash (logand u8_1 #x3f) 6) + (logand u8_2 #x3f))) + 3) + (kf)))) + ((and (<= #xf0 u8_0 #xf4) (<= 4 avail)) + (let ((u8_1 (bytevector-u8-ref bv (+ start 1))) + (u8_2 (bytevector-u8-ref bv (+ start 2))) + (u8_3 (bytevector-u8-ref bv (+ start 3)))) + (if (and (= (logand u8_1 #xc0) #x80) + (= (logand u8_2 #xc0) #x80) + (= (logand u8_3 #xc0) #x80) + (case u8_0 + ((#xf0) (>= u8_1 #x90)) + ((#xf4) (>= u8_1 #x8f)) + (else #t))) + (kt (integer->char + (logior (ash (logand u8_0 #x07) 18) + (ash (logand u8_1 #x3f) 12) + (ash (logand u8_2 #x3f) 6) + (logand u8_3 #x3f))) + 4) + (kf)))) + (else (kf)))) + +(define (bad-utf8-len bv cur buffering first-byte) + (define (ref n) + (bytevector-u8-ref bv (+ cur 1))) + (cond + ((< first-byte #x80) 0) + ((<= #xc2 first-byte #xdf) + (cond + ((< buffering 2) 1) + ((not (= (logand (ref 1) #xc0) #x80)) 1) + (else 0))) + ((= (logand first-byte #xf0) #xe0) + (cond + ((< buffering 2) 1) + ((not (= (logand (ref 1) #xc0) #x80)) 1) + ((and (eq? first-byte #xe0) (< (ref 1) #xa0)) 1) + ((and (eq? first-byte #xed) (< (ref 1) #x9f)) 1) + ((< buffering 3) 2) + ((not (= (logand (ref 2) #xc0) #x80)) 2) + (else 0))) + ((<= #xf0 first-byte #xf4) + (cond + ((< buffering 2) 1) + ((not (= (logand (ref 1) #xc0) #x80)) 1) + ((and (eq? first-byte #xf0) (< (ref 1) #x90)) 1) + ((and (eq? first-byte #xf4) (< (ref 1) #x8f)) 1) + ((< buffering 3) 2) + ((not (= (logand (ref 2) #xc0) #x80)) 2) + ((< buffering 4) 3) + ((not (= (logand (ref 3) #xc0) #x80)) 3) + (else 0))) + (else 1))) + (define (peek-char-and-len/utf8 port first-byte) (define (bad-utf8 len) (if (eq? (port-conversion-strategy port) 'substitute) (values #\? len) (decoding-error "peek-char" port))) - (cond - ((< first-byte #x80) - (values (integer->char first-byte) 1)) - ((<= #xc2 first-byte #xdf) - (call-with-values (lambda () (fill-input port 2)) - (lambda (buf buffering) - (let ((bv (port-buffer-bytevector buf)) - (cur (port-buffer-cur buf))) - (define (ref n) - (bytevector-u8-ref bv (+ cur 1))) - (when (or (< buffering 2) - (not (= (logand (ref 1) #xc0) #x80))) - (bad-utf8 1)) - (values (integer->char - (logior (ash (logand first-byte #x1f) 6) - (logand (ref 1) #x3f))) - 2))))) - ((= (logand first-byte #xf0) #xe0) - (call-with-values (lambda () (fill-input port 3)) - (lambda (buf buffering) - (let ((bv (port-buffer-bytevector buf)) - (cur (port-buffer-cur buf))) - (define (ref n) - (bytevector-u8-ref bv (+ cur 1))) - (when (or (< buffering 2) - (not (= (logand (ref 1) #xc0) #x80)) - (and (eq? first-byte #xe0) (< (ref 1) #xa0)) - (and (eq? first-byte #xed) (< (ref 1) #x9f))) - (bad-utf8 1)) - (when (or (< buffering 3) - (not (= (logand (ref 2) #xc0) #x80))) - (bad-utf8 2)) - (values (integer->char - (logior (ash (logand first-byte #x0f) 12) - (ash (logand (ref 1) #x3f) 6) - (logand (ref 2) #x3f))) - 3))))) - ((<= #xf0 first-byte #xf4) - (call-with-values (lambda () (fill-input port 4)) - (lambda (buf buffering) - (let ((bv (port-buffer-bytevector buf)) - (cur (port-buffer-cur buf))) - (define (ref n) - (bytevector-u8-ref bv (+ cur 1))) - (when (or (< buffering 2) - (not (= (logand (ref 1) #xc0) #x80)) - (and (eq? first-byte #xf0) (< (ref 1) #x90)) - (and (eq? first-byte #xf4) (< (ref 1) #x8f))) - (bad-utf8 1)) - (when (or (< buffering 3) - (not (= (logand (ref 2) #xc0) #x80))) - (bad-utf8 2)) - (when (or (< buffering 4) - (not (= (logand (ref 3) #xc0) #x80))) - (bad-utf8 3)) - (values (integer->char - (logior (ash (logand first-byte #x07) 18) - (ash (logand (ref 1) #x3f) 12) - (ash (logand (ref 2) #x3f) 6) - (logand (ref 3) #x3f))) - 4))))) - (else - (bad-utf8 1)))) - -(define-inlinable (peek-char-and-len/iso-8859-1 port first-byte) + (if (< first-byte #x80) + (values (integer->char first-byte) 1) + (call-with-values (lambda () + (fill-input port + (cond + ((<= #xc2 first-byte #xdf) 2) + ((= (logand first-byte #xf0) #xe0) 3) + (else 4)))) + (lambda (buf buffering) + (let* ((bv (port-buffer-bytevector buf)) + (cur (port-buffer-cur buf))) + (define (bad-utf8) + (let ((len (bad-utf8-len bv cur buffering first-byte))) + (when (zero? len) (error "internal error")) + (if (eq? (port-conversion-strategy port) 'substitute) + (values #\? len) + (decoding-error "peek-char" port)))) + (decode-utf8 bv cur buffering first-byte values bad-utf8)))))) + +(define (peek-char-and-len/iso-8859-1 port first-byte) (values (integer->char first-byte) 1)) (define (peek-char-and-len/iconv port first-byte) @@ -398,25 +438,33 @@ interpret its input and output." (else (lp input-size)))))) -(define-inlinable (peek-char-and-len port) +(define (peek-char-and-len port) (let ((first-byte (peek-byte port))) - (if (eq? first-byte the-eof-object) - (values first-byte 0) - (let ((first-byte (logand first-byte #xff))) - (case (%port-encoding port) - ((UTF-8) - (if (< first-byte #x80) - (values (integer->char first-byte) 1) - (peek-char-and-len/utf8 port first-byte))) - ((ISO-8859-1) - (peek-char-and-len/iso-8859-1 port first-byte)) - (else - (peek-char-and-len/iconv port first-byte))))))) + (if (not first-byte) + (values the-eof-object 0) + (case (%port-encoding port) + ((UTF-8) + (peek-char-and-len/utf8 port first-byte)) + ((ISO-8859-1) + (peek-char-and-len/iso-8859-1 port first-byte)) + (else + (peek-char-and-len/iconv port first-byte)))))) (define (%peek-char port) - (call-with-values (lambda () (peek-char-and-len port)) - (lambda (char len) - char))) + (define (slow-path) + (call-with-values (lambda () (peek-char-and-len port)) + (lambda (char len) + char))) + (define (fast-path buf bv cur buffered) + (let ((u8 (bytevector-u8-ref bv cur)) + (enc (%port-encoding port))) + (case enc + ((UTF-8) (decode-utf8 bv cur buffered u8 (lambda (char len) char) + slow-path)) + ((ISO-8859-1) (integer->char u8)) + (else (slow-path))))) + (peek-bytes port 1 fast-path + (lambda (buf bv cur buffered) (slow-path)))) |