summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2023-03-20 09:15:13 +0000
committerLudovic Courtès <ludo@gnu.org>2023-03-20 23:23:29 +0100
commitcba2e7e3fec3c781230570f5d1ef070625eeeda8 (patch)
treecad0acad118be78cc159381c8b14868550246abc
parentffb95239aacf86d8dc622a438bdaacfac4a66efc (diff)
downloadguile-cba2e7e3fec3c781230570f5d1ef070625eeeda8.tar.gz
Fix some invalid unicode handling issues with suspendable ports.
Fixes <https://bugs.gnu.org/62290>. Based on the implementation in ports.c. I don't understand what this code is really doing, but the suspendable ports implementation differs from the similar C code for a couple of inequalities. * module/ice-9/suspendable-ports.scm (decode-utf8, bad-utf8-len): Flip a couple of inequalities. * test-suite/tests/ports.test ("string ports"): Add additional invalid UTF-8 test case. * NEWS: Update. Signed-off-by: Ludovic Courtès <ludo@gnu.org>
-rw-r--r--NEWS3
-rw-r--r--module/ice-9/suspendable-ports.scm8
-rw-r--r--test-suite/tests/ports.test7
3 files changed, 14 insertions, 4 deletions
diff --git a/NEWS b/NEWS
index a55cb583b..167b0f2ad 100644
--- a/NEWS
+++ b/NEWS
@@ -23,6 +23,9 @@ the compiler reports it as "possibly unused".
* Bug fixes
+** (ice-9 suspendable-ports) incorrect UTF-8 decoding
+ (https://bugs.gnu.org/62290)
+
* Hashing of UTF-8 symbols with non-ASCII characters avoids corruption
This issue could cause `scm_from_utf8_symbol' and
diff --git a/module/ice-9/suspendable-ports.scm b/module/ice-9/suspendable-ports.scm
index a823f1d37..9fac1df62 100644
--- a/module/ice-9/suspendable-ports.scm
+++ b/module/ice-9/suspendable-ports.scm
@@ -419,7 +419,7 @@
(= (logand u8_2 #xc0) #x80)
(case u8_0
((#xe0) (>= u8_1 #xa0))
- ((#xed) (>= u8_1 #x9f))
+ ((#xed) (<= u8_1 #x9f))
(else #t)))
(kt (integer->char
(logior (ash (logand u8_0 #x0f) 12)
@@ -436,7 +436,7 @@
(= (logand u8_3 #xc0) #x80)
(case u8_0
((#xf0) (>= u8_1 #x90))
- ((#xf4) (>= u8_1 #x8f))
+ ((#xf4) (<= u8_1 #x8f))
(else #t)))
(kt (integer->char
(logior (ash (logand u8_0 #x07) 18)
@@ -462,7 +462,7 @@
((< buffering 2) 1)
((not (= (logand (ref 1) #xc0) #x80)) 1)
((and (eq? first-byte #xe0) (< (ref 1) #xa0)) 1)
- ((and (eq? first-byte #xed) (< (ref 1) #x9f)) 1)
+ ((and (eq? first-byte #xed) (> (ref 1) #x9f)) 1)
((< buffering 3) 2)
((not (= (logand (ref 2) #xc0) #x80)) 2)
(else 0)))
@@ -471,7 +471,7 @@
((< buffering 2) 1)
((not (= (logand (ref 1) #xc0) #x80)) 1)
((and (eq? first-byte #xf0) (< (ref 1) #x90)) 1)
- ((and (eq? first-byte #xf4) (< (ref 1) #x8f)) 1)
+ ((and (eq? first-byte #xf4) (> (ref 1) #x8f)) 1)
((< buffering 3) 2)
((not (= (logand (ref 2) #xc0) #x80)) 2)
((< buffering 4) 3)
diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test
index 66e10e3dd..1b30e1a68 100644
--- a/test-suite/tests/ports.test
+++ b/test-suite/tests/ports.test
@@ -1063,6 +1063,13 @@
error ;; 88: not a valid starting byte
error ;; 88: not a valid starting byte
error ;; 88: not a valid starting byte
+ eof))
+
+ (test-decoding-error (#xf4 #xa4 #xbd #xa4) "UTF-8"
+ (error ;; 2nd byte should be in the 90..BF range
+ error ;; 88: not a valid starting byte
+ error ;; 88: not a valid starting byte
+ error ;; 88: not a valid starting byte
eof))))
(with-test-prefix "call-with-output-string"