summaryrefslogtreecommitdiff
path: root/lisp/international/utf-16.el
diff options
context:
space:
mode:
authorKenichi Handa <handa@m17n.org>2003-03-31 01:48:48 +0000
committerKenichi Handa <handa@m17n.org>2003-03-31 01:48:48 +0000
commit2217b8e1e67a18aec2c1693038d21a12b1d02f76 (patch)
tree8031fb1dd569147aa23d1b586e67a111ba2a1196 /lisp/international/utf-16.el
parentc0e7fbb88eaff9808ec4a6ca058846d5f5c0c872 (diff)
downloademacs-2217b8e1e67a18aec2c1693038d21a12b1d02f76.tar.gz
(ccl-decode-mule-utf-16-le): Don't assume the signature bytes.
(ccl-decode-mule-utf-16-be): Likewise. (ccl-encode-mule-utf-16-le): Don't produce the signature bytes. (ccl-encode-mule-utf-16-be): Likewise.
Diffstat (limited to 'lisp/international/utf-16.el')
-rw-r--r--lisp/international/utf-16.el29
1 files changed, 11 insertions, 18 deletions
diff --git a/lisp/international/utf-16.el b/lisp/international/utf-16.el
index e9b7c801f91..6359648e40c 100644
--- a/lisp/international/utf-16.el
+++ b/lisp/international/utf-16.el
@@ -68,8 +68,9 @@
;; Needed in macro expansion, so can't be let-bound. Zapped after use.
(eval-and-compile
(defconst utf-16-decode-ucs
- ;; We have the unicode in r1. Output is charset ID in r0, code point
- ;; in r1.
+ ;; We have the unicode in r1. Output is charset ID in r0, code
+ ;; point in r1. As r6 keeps endian information, the value should
+ ;; not be changed.
`((lookup-integer utf-subst-table-for-decode r1 r3)
(if r7 ; got a translation
((r0 = r1) (r1 = r3))
@@ -114,15 +115,14 @@
(define-ccl-program ccl-decode-mule-utf-16-le
`(2 ; 2 bytes -> 1 to 4 bytes
- ((read r0 r1) ; signature
- (loop
+ ((loop
(read r3 r4)
(r1 = (r4 <8 r3))
,utf-16-decode-ucs
(translate-character utf-translation-table-for-decode r0 r1)
(write-multibyte-character r0 r1)
(repeat))))
- "Decode little endian UTF-16 (ignoring signature bytes).
+ "Decode UTF-16LE (little endian without signature bytes).
Basic decoding is done into the charsets ascii, latin-iso8859-1 and
mule-unicode-*. Un-representable Unicode characters are decoded as
U+fffd. The result is run through the translation-table named
@@ -130,15 +130,14 @@ U+fffd. The result is run through the translation-table named
(define-ccl-program ccl-decode-mule-utf-16-be
`(2 ; 2 bytes -> 1 to 4 bytes
- ((read r0 r1) ; signature
- (loop
+ ((loop
(read r3 r4)
(r1 = (r3 <8 r4))
,utf-16-decode-ucs
(translate-character utf-translation-table-for-decode r0 r1)
(write-multibyte-character r0 r1)
(repeat))))
- "Decode big endian UTF-16 (ignoring signature bytes).
+ "Decode UTF-16BE (big endian without signature bytes).
Basic decoding is done into the charsets ascii, latin-iso8859-1 and
mule-unicode-*. Un-representable Unicode characters are
decoded as U+fffd. The result is run through the translation-table of
@@ -173,9 +172,7 @@ name `utf-translation-table-for-decode'.")
(define-ccl-program ccl-encode-mule-utf-16-le
`(1
- ((write #xff)
- (write #xfe)
- (loop
+ ((loop
(read-multibyte-character r0 r1)
(lookup-character utf-subst-table-for-encode r0 r1)
(if (r7 == 0)
@@ -184,7 +181,7 @@ name `utf-translation-table-for-decode'.")
(write (r0 & 255))
(write (r0 >> 8))
(repeat))))
- "Encode to little endian UTF-16 with signature.
+ "Encode to UTF-16LE (little endian without signature).
Characters from the charsets ascii, eight-bit-control,
eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
after translation through the translation-table of name
@@ -193,9 +190,7 @@ Others are encoded as U+FFFD.")
(define-ccl-program ccl-encode-mule-utf-16-be
`(1
- ((write #xfe)
- (write #xff)
- (loop
+ ((loop
(read-multibyte-character r0 r1)
(lookup-character utf-subst-table-for-encode r0 r1)
(if (r7 == 0)
@@ -204,7 +199,7 @@ Others are encoded as U+FFFD.")
(write (r0 >> 8))
(write (r0 & 255))
(repeat))))
- "Encode to big endian UTF-16 with signature.
+ "Encode to UTF-16BE (big endian without signature).
Characters from the charsets ascii, eight-bit-control,
eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
after translation through the translation-table named
@@ -215,8 +210,6 @@ Others are encoded as U+FFFD.")
(let ((doc "
-Assumes and ignores the leading two-byte signature.
-
It supports Unicode characters of these ranges:
U+0000..U+33FF, U+E000..U+FFFF.
They correspond to these Emacs character sets: