diff options
author | Kenichi Handa <handa@m17n.org> | 2003-09-08 12:53:41 +0000 |
---|---|---|
committer | Kenichi Handa <handa@m17n.org> | 2003-09-08 12:53:41 +0000 |
commit | 8f924df7df019cce90537647de2627581043b5c4 (patch) | |
tree | 6c40bd05679425e710d6b2e5649eae3da5e40a52 /lisp/language | |
parent | 463f5630a5e7cbe7f042bc1175d1fa1c4e98860f (diff) | |
parent | 9d4807432a01f9b3cc519fcfa3ea92a70ffa7f43 (diff) | |
download | emacs-8f924df7df019cce90537647de2627581043b5c4.tar.gz |
*** empty log message ***
Diffstat (limited to 'lisp/language')
31 files changed, 1753 insertions, 2682 deletions
diff --git a/lisp/language/china-util.el b/lisp/language/china-util.el index 57590d8a665..3f6dee9488f 100644 --- a/lisp/language/china-util.el +++ b/lisp/language/china-util.el @@ -1,8 +1,11 @@ ;;; china-util.el --- utilities for Chinese -*- coding: iso-2022-7bit -*- ;; Copyright (C) 1995, 2003 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. ;; Copyright (C) 1995, 2001 Free Software Foundation, Inc. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 ;; Keywords: mule, multilingual, Chinese @@ -165,255 +168,25 @@ Return the length of resulting text." (interactive) (encode-hz-region (point-min) (point-max))) -;; The following sets up a translation table (big5-to-cns) from Big 5 -;; to CNS encoding, using some auxiliary functions to make the code -;; more readable. - -;; Many kudos to Himi! The used code has been adapted from his -;; mule-ucs package. - -(eval-when-compile -(defun big5-to-flat-code (num) - "Convert NUM in Big 5 encoding to a `flat code'. -0xA140 will be mapped to position 0, 0xA141 to position 1, etc. -There are no gaps in the flat code." - - (let ((hi (/ num 256)) - (lo (% num 256))) - (+ (* 157 (- hi #xa1)) - (- lo (if (>= lo #xa1) 98 64))))) - -(defun flat-code-to-big5 (num) - "Convert NUM from a `flat code' to Big 5 encoding. -This is the inverse function of `big5-to-flat-code'." - - (let ((hi (/ num 157)) - (lo (% num 157))) - (+ (* 256 (+ hi #xa1)) - (+ lo (if (< lo 63) 64 98))))) - -(defun euc-to-flat-code (num) - "Convert NUM in EUC encoding (in GL representation) to a `flat code'. -0x2121 will be mapped to position 0, 0x2122 to position 1, etc. -There are no gaps in the flat code." - - (let ((hi (/ num 256)) - (lo (% num 256))) - (+ (* 94 (- hi #x21)) - (- lo #x21)))) - -(defun flat-code-to-euc (num) - "Convert NUM from a `flat code' to EUC encoding (in GL representation). -The inverse function of `euc-to-flat-code'. The high and low bytes are -returned in a list." - - (let ((hi (/ num 94)) - (lo (% num 94))) - (list (+ hi #x21) (+ lo #x21)))) - -(defun expand-euc-big5-alist (alist) - "Create a translation table and fills it with data given in ALIST. -Elements of ALIST can be either given as - - ((euc-charset . startchar) . (big5-range-begin . big5-range-end)) - -or as - - (euc-character . big5-charcode) - -The former maps a range of glyphs in an EUC charset (where STARTCHAR -is in GL representation) to a certain range of Big 5 encoded -characters, the latter maps a single glyph. Glyphs which can't be -mapped will be represented with the byte 0xFF. - -The return value is the filled translation table." - - (let ((chartable (make-char-table 'translation-table #xFF)) - char - big5 - i - end - codepoint - charset) - (dolist (elem alist) - (setq char (car elem) - big5 (cdr elem)) - (cond ((and (consp char) - (consp big5)) - (setq i (big5-to-flat-code (car big5)) - end (big5-to-flat-code (cdr big5)) - codepoint (euc-to-flat-code (cdr char)) - charset (car char)) - (while (>= end i) - (aset chartable - (decode-big5-char (flat-code-to-big5 i)) - (apply (function make-char) - charset - (flat-code-to-euc codepoint))) - (setq i (1+ i) - codepoint (1+ codepoint)))) - ((and (char-valid-p char) - (numberp big5)) - (setq i (decode-big5-char big5)) - (aset chartable i char)) - (t - (error "Unknown slot type: %S" elem)))) - ;; the return value - chartable))) - -;; All non-CNS encodings are commented out. - -(define-translation-table 'big5-to-cns - (eval-when-compile - (expand-euc-big5-alist - '( - ;; Symbols - ((chinese-cns11643-1 . #x2121) . (#xA140 . #xA1F5)) - (?$(G"X(B . #xA1F6) - (?$(G"W(B . #xA1F7) - ((chinese-cns11643-1 . #x2259) . (#xA1F8 . #xA2AE)) - ((chinese-cns11643-1 . #x2421) . (#xA2AF . #xA3BF)) - ;; Control codes (vendor dependent) - ((chinese-cns11643-1 . #x4221) . (#xA3C0 . #xA3E0)) - ;; Level 1 Ideographs - ((chinese-cns11643-1 . #x4421) . (#xA440 . #xACFD)) - (?$(GWS(B . #xACFE) - ((chinese-cns11643-1 . #x5323) . (#xAD40 . #xAFCF)) - ((chinese-cns11643-1 . #x5754) . (#xAFD0 . #xBBC7)) - ((chinese-cns11643-1 . #x6B51) . (#xBBC8 . #xBE51)) - (?$(GkP(B . #xBE52) - ((chinese-cns11643-1 . #x6F5C) . (#xBE53 . #xC1AA)) - ((chinese-cns11643-1 . #x7536) . (#xC1AB . #xC2CA)) - (?$(Gu5(B . #xC2CB) - ((chinese-cns11643-1 . #x7737) . (#xC2CC . #xC360)) - ((chinese-cns11643-1 . #x782E) . (#xC361 . #xC3B8)) - (?$(Gxe(B . #xC3B9) - (?$(Gxd(B . #xC3BA) - ((chinese-cns11643-1 . #x7866) . (#xC3BB . #xC455)) - (?$(Gx-(B . #xC456) - ((chinese-cns11643-1 . #x7962) . (#xC457 . #xC67E)) - ;; Symbols - ((chinese-cns11643-1 . #x2621) . (#xC6A1 . #xC6BE)) - ;; Radicals - (?$(G'#(B . #xC6BF) - (?$(G'$(B . #xC6C0) - (?$(G'&(B . #xC6C1) - (?$(G'((B . #xC6C2) - (?$(G'-(B . #xC6C3) - (?$(G'.(B . #xC6C4) - (?$(G'/(B . #xC6C5) - (?$(G'4(B . #xC6C6) - (?$(G'7(B . #xC6C7) - (?$(G':(B . #xC6C8) - (?$(G'<(B . #xC6C9) - (?$(G'B(B . #xC6CA) - (?$(G'G(B . #xC6CB) - (?$(G'N(B . #xC6CC) - (?$(G'S(B . #xC6CD) - (?$(G'T(B . #xC6CE) - (?$(G'U(B . #xC6CF) - (?$(G'Y(B . #xC6D0) - (?$(G'Z(B . #xC6D1) - (?$(G'a(B . #xC6D2) - (?$(G'f(B . #xC6D3) - (?$(G()(B . #xC6D4) - (?$(G(*(B . #xC6D5) - (?$(G(c(B . #xC6D6) - (?$(G(l(B . #xC6D7) - ;; Diacritical Marks - ; ((japanese-jisx0208 . #x212F) . (#xC6D8 . #xC6D9)) - ;; Japanese Kana Supplement - ; ((japanese-jisx0208 . #x2133) . (#xC6DA . #xC6E3)) - ;; Japanese Hiragana - ; ((japanese-jisx0208 . #x2421) . (#xC6E7 . #xC77A)) - ;; Japanese Katakana - ; ((japanese-jisx0208 . #x2521) . (#xC77B . #xC7F2)) - ;; Cyrillic Characters - ; ((japanese-jisx0208 . #x2721) . (#xC7F3 . #xC854)) - ; ((japanese-jisx0208 . #x2751) . (#xC855 . #xC875)) - ;; Special Chinese Characters - (?$(J!#(B . #xC879) - (?$(J!$(B . #xC87B) - (?$(J!*(B . #xC87D) - (?$(J!R(B . #xC8A2) - - ;; JIS X 0208 NOT SIGN (cf. U+00AC) - ; (?$B"L(B . #xC8CD) - ;; JIS X 0212 BROKEN BAR (cf. U+00A6) - ; (?$(D"C(B . #xC8CE) - - ;; GB 2312 characters - ; (?$A!d(B . #xC8CF) - ; (?$A!e(B . #xC8D0) - ;;;;; C8D1 - Japanese `($B3t(B)' - ; (?$A!m(B . #xC8D2) - ;;;;; C8D2 - Tel. - - ;; Level 2 Ideographs - ((chinese-cns11643-2 . #x2121) . (#xC940 . #xC949)) - (?$(GDB(B . #xC94A);; a duplicate of #xA461 - ((chinese-cns11643-2 . #x212B) . (#xC94B . #xC96B)) - ((chinese-cns11643-2 . #x214D) . (#xC96C . #xC9BD)) - (?$(H!L(B . #xC9BE) - ((chinese-cns11643-2 . #x217D) . (#xC9BF . #xC9EC)) - ((chinese-cns11643-2 . #x224E) . (#xC9ED . #xCAF6)) - (?$(H"M(B . #xCAF7) - ((chinese-cns11643-2 . #x2439) . (#xCAF8 . #xD6CB)) - (?$(H>c(B . #xD6CC) - ((chinese-cns11643-2 . #x3770) . (#xD6CD . #xD779)) - (?$(H?j(B . #xD77A) - ((chinese-cns11643-2 . #x387E) . (#xD77B . #xDADE)) - (?$(H7o(B . #xDADF) - ((chinese-cns11643-2 . #x3E64) . (#xDAE0 . #xDBA6)) - ((chinese-cns11643-2 . #x3F6B) . (#xDBA7 . #xDDFB)) - (?$(HAv(B . #xDDFC);; a duplicate of #xDCD1 - ((chinese-cns11643-2 . #x4424) . (#xDDFD . #xE8A2)) - ((chinese-cns11643-2 . #x554C) . (#xE8A3 . #xE975)) - ((chinese-cns11643-2 . #x5723) . (#xE976 . #xEB5A)) - ((chinese-cns11643-2 . #x5A29) . (#xEB5B . #xEBF0)) - (?$(HUK(B . #xEBF1) - ((chinese-cns11643-2 . #x5B3F) . (#xEBF2 . #xECDD)) - (?$(HW"(B . #xECDE) - ((chinese-cns11643-2 . #x5C6A) . (#xECDF . #xEDA9)) - ((chinese-cns11643-2 . #x5D75) . (#xEDAA . #xEEEA)) - (?$(Hd/(B . #xEEEB) - ((chinese-cns11643-2 . #x6039) . (#xEEEC . #xF055)) - (?$(H]t(B . #xF056) - ((chinese-cns11643-2 . #x6243) . (#xF057 . #xF0CA)) - (?$(HZ((B . #xF0CB) - ((chinese-cns11643-2 . #x6337) . (#xF0CC . #xF162)) - ((chinese-cns11643-2 . #x6430) . (#xF163 . #xF16A)) - (?$(Hga(B . #xF16B) - ((chinese-cns11643-2 . #x6438) . (#xF16C . #xF267)) - (?$(Hi4(B . #xF268) - ((chinese-cns11643-2 . #x6573) . (#xF269 . #xF2C2)) - ((chinese-cns11643-2 . #x664E) . (#xF2C3 . #xF374)) - ((chinese-cns11643-2 . #x6762) . (#xF375 . #xF465)) - ((chinese-cns11643-2 . #x6935) . (#xF466 . #xF4B4)) - (?$(HfM(B . #xF4B5) - ((chinese-cns11643-2 . #x6962) . (#xF4B6 . #xF4FC)) - ((chinese-cns11643-2 . #x6A4C) . (#xF4FD . #xF662)) - (?$(HjK(B . #xF663) - ((chinese-cns11643-2 . #x6C52) . (#xF664 . #xF976)) - ((chinese-cns11643-2 . #x7167) . (#xF977 . #xF9C3)) - (?$(Hqf(B . #xF9C4) - (?$(Hr4(B . #xF9C5) - (?$(Hr@(B . #xF9C6) - ((chinese-cns11643-2 . #x7235) . (#xF9C7 . #xF9D1)) - ((chinese-cns11643-2 . #x7241) . (#xF9D2 . #xF9D5)) - - ;; Additional Ideographs - (?$(IC7(B . #xF9D6) - (?$(IOP(B . #xF9D7) - (?$(IDN(B . #xF9D8) - (?$(IPJ(B . #xF9D9) - (?$(I,](B . #xF9DA) - (?$(I=~(B . #xF9DB) - (?$(IK\(B . #xF9DC) - ) - )) -) +;;;###autoload +(defun post-read-decode-hz (len) + (let ((pos (point)) + (buffer-modified-p (buffer-modified-p)) + last-coding-system-used) + (prog1 + (decode-hz-region pos (+ pos len)) + (set-buffer-modified-p buffer-modified-p)))) +;;;###autoload +(defun pre-write-encode-hz (from to) + (let ((buf (current-buffer))) + (set-buffer (generate-new-buffer " *temp*")) + (if (stringp from) + (insert from) + (insert-buffer-substring buf from to)) + (let (last-coding-system-used) + (encode-hz-region 1 (point-max))) + nil)) ;; (provide 'china-util) diff --git a/lisp/language/chinese.el b/lisp/language/chinese.el index 14546edbf89..37739399c89 100644 --- a/lisp/language/chinese.el +++ b/lisp/language/chinese.el @@ -1,7 +1,10 @@ ;;; chinese.el --- support for Chinese -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 ;; Keywords: multilingual, Chinese @@ -33,47 +36,51 @@ ;;; Chinese (general) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(make-coding-system - 'iso-2022-cn 2 ?C - "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)." - '(ascii - (nil chinese-gb2312 chinese-cns11643-1) - (nil chinese-cns11643-2) - nil - nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil - init-bol) - '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2) - (mime-charset . iso-2022-cn))) + +(define-coding-system 'iso-2022-cn + "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)." + :coding-type 'iso-2022 + :mnemonic ?C + :charset-list '(ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2) + :designation [ascii + (nil chinese-gb2312 chinese-cns11643-1) + (nil chinese-cns11643-2) + nil] + :flags '(ascii-at-eol ascii-at-cntl 7-bit + designation locking-shift single-shift init-at-bol) + :mime-charset 'iso-2022-cn) (define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn) -(make-coding-system - 'iso-2022-cn-ext 2 ?C - "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)." - '(ascii - (nil chinese-gb2312 chinese-cns11643-1) - (nil chinese-cns11643-2) - (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 - chinese-cns11643-6 chinese-cns11643-7) - nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil - init-bol) - '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2 - chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 - chinese-cns11643-6 chinese-cns11643-7) - (mime-charset . iso-2022-cn-ext))) +(define-coding-system 'iso-2022-cn-ext + "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)." + :coding-type 'iso-2022 + :mnemonic ?C + :charset-list '(ascii + chinese-gb2312 chinese-cns11643-1 + chinese-cns11643-2 chinese-cns11643-3 chinese-cns11643-4 + chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7) + :designation '[ascii + (nil chinese-gb2312 chinese-cns11643-1) + (nil chinese-cns11643-2) + (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 + chinese-cns11643-6 chinese-cns11643-7)] + :flags '(ascii-at-eol ascii-at-cntl 7-bit + designation locking-shift single-shift init-at-bol) + :mime-charset 'iso-2022-cn-ext) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Chinese GB2312 (simplified) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(make-coding-system - 'chinese-iso-8bit 2 ?c - "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)." - '(ascii chinese-gb2312 nil nil - nil ascii-eol ascii-cntl nil nil nil nil) - '((safe-charsets ascii chinese-gb2312) - (mime-charset . gb2312))) +(define-coding-system 'chinese-iso-8bit + "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:CN-GB)." + :coding-type 'iso-2022 + :mnemonic ?c + :charset-list '(ascii chinese-gb2312) + :designation [ascii chinese-gb2312 nil nil] + :mime-charset 'cn-gb) (define-coding-system-alias 'cn-gb-2312 'chinese-iso-8bit) (define-coding-system-alias 'euc-china 'chinese-iso-8bit) @@ -81,36 +88,18 @@ (define-coding-system-alias 'cn-gb 'chinese-iso-8bit) (define-coding-system-alias 'gb2312 'chinese-iso-8bit) -(make-coding-system - 'chinese-hz 0 ?z - "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)." - nil - '((safe-charsets ascii chinese-gb2312) - (mime-charset . hz-gb-2312) - (post-read-conversion . post-read-decode-hz) - (pre-write-conversion . pre-write-encode-hz))) +(define-coding-system 'chinese-hz + "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)." + :coding-type 'utf-8 + :mnemonic ?z + :charset-list '(ascii chinese-gb2312) + :mime-charset 'hz-gb-2312 + :post-read-conversion 'post-read-decode-hz + :pre-write-conversion 'pre-write-encode-hz) (define-coding-system-alias 'hz-gb-2312 'chinese-hz) (define-coding-system-alias 'hz 'chinese-hz) -(defun post-read-decode-hz (len) - (let ((pos (point)) - (buffer-modified-p (buffer-modified-p)) - last-coding-system-used) - (prog1 - (decode-hz-region pos (+ pos len)) - (set-buffer-modified-p buffer-modified-p)))) - -(defun pre-write-encode-hz (from to) - (let ((buf (current-buffer))) - (set-buffer (generate-new-buffer " *temp*")) - (if (stringp from) - (insert from) - (insert-buffer-substring buf from to)) - (let (last-coding-system-used) - (encode-hz-region 1 (point-max))) - nil)) - (set-language-info-alist "Chinese-GB" '((charset chinese-gb2312 chinese-sisheng) (coding-system chinese-iso-8bit iso-2022-cn chinese-hz) @@ -125,36 +114,16 @@ ;; Chinese BIG5 (traditional) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(make-coding-system - 'chinese-big5 3 ?B - "BIG5 8-bit encoding for Chinese (MIME:Big5)." - nil - '((safe-charsets ascii chinese-big5-1 chinese-big5-2) - (mime-charset . big5) - (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char) - (chinese-big5-2 "BIG5" encode-big5-char)))) +(define-coding-system 'chinese-big5 + "BIG5 8-bit encoding for Chinese (MIME:Big5)" + :coding-type 'charset + :mnemonic ?B + :charset-list '(ascii big5) + :mime-charset 'big5) (define-coding-system-alias 'big5 'chinese-big5) (define-coding-system-alias 'cn-big5 'chinese-big5) -;; Big5 font requires special encoding. -(define-ccl-program ccl-encode-big5-font - `(0 - ;; In: R0:chinese-big5-1 or chinese-big5-2 - ;; R1:position code 1 - ;; R2:position code 2 - ;; Out: R1:font code point 1 - ;; R2:font code point 2 - ((r2 = ((((r1 - ?\x21) * 94) + r2) - ?\x21)) - (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280)) - (r1 = ((r2 / 157) + ?\xA1)) - (r2 %= 157) - (if (r2 < ?\x3F) (r2 += ?\x40) (r2 += ?\x62)))) - "CCL program to encode a Big5 code to code point of Big5 font.") - -(setq font-ccl-encoder-alist - (cons (cons "big5" ccl-encode-big5-font) font-ccl-encoder-alist)) - (set-language-info-alist "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2) (coding-system chinese-big5 chinese-iso-7bit) @@ -165,148 +134,39 @@ (documentation . "Support for Chinese Big5 character set.")) '("Chinese")) +(define-coding-system 'chinese-big5-hkscs + "BIG5-HKSCS 8-bit encoding for Chinese, Hong Kong supplement (MIME:Big5-HKSCS)" + :coding-type 'charset + :mnemonic ?B + :charset-list '(ascii big5-hkscs) + :mime-charset 'big5-hkscs) +(define-coding-system-alias 'big5-hkscs 'chinese-big5-hkscs) +(define-coding-system-alias 'cn-big5-hkscs 'chinese-big5-hkscs) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Chinese CNS11643 (traditional) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(defvar big5-to-cns (make-translation-table) - "Translation table for encoding to `euc-tw'.") -;; Could have been done by china-util loaded before. -(unless (get 'big5-to-cns 'translation-table) - (define-translation-table 'big5-to-cns big5-to-cns)) - -(define-ccl-program ccl-decode-euc-tw - ;; CNS plane 1 needs either two or four bytes in EUC-TW encoding; - ;; CNS planes 2 to 7 always need four bytes. In internal encoding of - ;; Emacs, CNS planes 1 and 2 need three bytes, and planes 3 to 7 need - ;; four bytes. Thus a buffer magnification value of 2 (for both - ;; encoding and decoding) is sufficient. - `(2 - ;; we don't have enough registers to hold all charset-ids - ((r4 = ,(charset-id 'chinese-cns11643-1)) - (r5 = ,(charset-id 'chinese-cns11643-2)) - (r6 = ,(charset-id 'chinese-cns11643-3)) - (loop - (read-if (r0 < #x80) - ;; ASCII - (write-repeat r0) - ;; not ASCII - (if (r0 == #x8E) - ;; single shift - (read-if (r1 < #xA1) - ;; invalid byte - ((write r0) - (write-repeat r1)) - (if (r1 > #xA7) - ;; invalid plane - ((write r0) - (write-repeat r1)) - ;; OK, we have a plane - (read-if (r2 < #xA1) - ;; invalid first byte - ((write r0 r1) - (write-repeat r2)) - (read-if (r3 < #xA1) - ;; invalid second byte - ((write r0 r1 r2) - (write-repeat r3)) - ;; CNS 1-7, finally - ((branch (r1 - #xA1) - (r1 = r4) - (r1 = r5) - (r1 = r6) - (r1 = ,(charset-id 'chinese-cns11643-4)) - (r1 = ,(charset-id 'chinese-cns11643-5)) - (r1 = ,(charset-id 'chinese-cns11643-6)) - (r1 = ,(charset-id 'chinese-cns11643-7))) - (r2 = ((((r2 - #x80) << 7) + r3) - #x80)) - (write-multibyte-character r1 r2) - (repeat)))))) - ;; standard EUC - (if (r0 < #xA1) - ;; invalid first byte - (write-repeat r0) - (read-if (r1 < #xA1) - ;; invalid second byte - ((write r0) - (write-repeat r1)) - ;; CNS 1, finally - ((r1 = ((((r0 - #x80) << 7) + r1) - #x80)) - (write-multibyte-character r4 r1) - (repeat))))))))) - "CCL program to decode EUC-TW encoding." -) - -(define-ccl-program ccl-encode-euc-tw - `(2 - ;; we don't have enough registers to hold all charset-ids - ((r2 = ,(charset-id 'ascii)) - (r3 = ,(charset-id 'chinese-big5-1)) - (r4 = ,(charset-id 'chinese-big5-2)) - (r5 = ,(charset-id 'chinese-cns11643-1)) - (r6 = ,(charset-id 'chinese-cns11643-2)) - (loop - (read-multibyte-character r0 r1) - (if (r0 == r2) - (write-repeat r1) - (;; Big 5 encoded characters are first translated to CNS - (if (r0 == r3) - (translate-character big5-to-cns r0 r1) - (if (r0 == r4) - (translate-character big5-to-cns r0 r1))) - (if (r0 == r5) - (r0 = #xA1) - (if (r0 == r6) - (r0 = #xA2) - (if (r0 == ,(charset-id 'chinese-cns11643-3)) - (r0 = #xA3) - (if (r0 == ,(charset-id 'chinese-cns11643-4)) - (r0 = #xA4) - (if (r0 == ,(charset-id 'chinese-cns11643-5)) - (r0 = #xA5) - (if (r0 == ,(charset-id 'chinese-cns11643-6)) - (r0 = #xA6) - (if (r0 == ,(charset-id 'chinese-cns11643-7)) - (r0 = #xA7) - ;; not CNS. We use a dummy character which - ;; can't occur in EUC-TW encoding to indicate - ;; this. - (write-repeat #xFF)))))))))) - (if (r0 != #xA1) - ;; single shift and CNS plane - ((write #x8E) - (write r0))) - (write ((r1 >> 7) + #x80)) - (write ((r1 % #x80) + #x80)) - (repeat)))) - "CCL program to encode EUC-TW encoding." -) - -(defun euc-tw-pre-write-conversion (beg end) - "Semi-dummy pre-write function effectively to autoload china-util." - ;; Ensure translation table is loaded. - (require 'china-util) - ;; Don't do this again. - (coding-system-put 'euc-tw 'pre-write-conversion nil) - nil) - -(make-coding-system - 'euc-tw 4 ?Z - "ISO 2022 based EUC encoding for Chinese CNS11643. -Big5 encoding is accepted for input also (which is then converted to CNS)." - '(ccl-decode-euc-tw . ccl-encode-euc-tw) - '((safe-charsets ascii - chinese-big5-1 - chinese-big5-2 - chinese-cns11643-1 - chinese-cns11643-2 - chinese-cns11643-3 - chinese-cns11643-4 - chinese-cns11643-5 - chinese-cns11643-6 - chinese-cns11643-7) - (valid-codes (0 . 255)) - (pre-write-conversion . euc-tw-pre-write-conversion))) +(define-coding-system 'euc-tw + "ISO 2022 based EUC encoding for Chinese CNS11643." + :coding-type 'iso-2022 + :mnemonic ?Z + :charset-list '(ascii + chinese-cns11643-1 + chinese-cns11643-2 + chinese-cns11643-3 + chinese-cns11643-4 + chinese-cns11643-5 + chinese-cns11643-6 + chinese-cns11643-7) + :designation [ascii chinese-cns11643-1 (chinese-cns11643-1 + chinese-cns11643-2 + chinese-cns11643-3 + chinese-cns11643-4 + chinese-cns11643-5 + chinese-cns11643-6 + chinese-cns11643-7) nil] + :mime-charset 'euc-tw) (define-coding-system-alias 'euc-taiwan 'euc-tw) @@ -320,6 +180,7 @@ Big5 encoding is accepted for input also (which is then converted to CNS)." chinese-iso-8bit) (features china-util) (input-method . "chinese-cns-quick") + ;; Fixme: presumably it won't accept big5 now. (documentation . "\ Support for Chinese CNS character sets. Note that the EUC-TW coding system accepts Big5 for input also (which is then converted to CNS).")) @@ -341,6 +202,53 @@ the EUC-TW coding system accepts Big5 for input also (which is then converted to CNS).")) '("Chinese")) + +;;; Chinese GBK + +(define-coding-system 'chinese-gbk + "GBK encoding for Chinese (MIME:GBK)." + :coding-type 'charset + :mnemonic ?c + :charset-list '(chinese-gbk) + :mime-charset 'gbk) +(define-coding-system-alias 'gbk 'chinese-gbk) +(define-coding-system-alias 'cp936 'chinese-gbk) +(define-coding-system-alias 'windows-936 'chinese-gbk) + +(set-language-info-alist + "Chinese-GBK" '((charset chinese-gbk) + (coding-system chinese-gbk) + (coding-priority gbk iso-2022-cn chinese-big5 + chinese-iso-8bit) ; fixme? + (input-method . "chinese-py-punct") ; fixme? + (features china-util) + (documentation . "Support for Chinese GBK character set.")) + '("Chinese")) + +;;; Chinese GB18030 + +(define-coding-system 'chinese-gb18030 + "GB18030 encoding for Chinese (MIME:GB18030)." + :coding-type 'charset + :mnemonic ?c + :charset-list '(gb18030) + :mime-charset 'gb18030) + +(define-coding-system-alias 'gb18030 'chinese-gb18030) + +(set-language-info-alist + "Chinese-GB18030" '((charset gb18030) + (coding-system chinese-gb18030) + (coding-priority gb18030 gbk iso-2022-cn chinese-big5 + chinese-iso-8bit) ; fixme? + (input-method . "chinese-py-punct") ; fixme? + (features china-util) + (documentation + . "Support for Chinese GB18030 character set.")) + '("Chinese")) + +;; Fixme: add HKSCS + (provide 'chinese) ;;; chinese.el ends here diff --git a/lisp/language/cyril-util.el b/lisp/language/cyril-util.el index ed9125aa03f..5d53f224a0a 100644 --- a/lisp/language/cyril-util.el +++ b/lisp/language/cyril-util.el @@ -30,19 +30,12 @@ ;;;###autoload (defun cyrillic-encode-koi8-r-char (char) "Return KOI8-R external character code of CHAR if appropriate." - (aref (char-table-extra-slot - (get 'cyrillic-koi8-r-nonascii-translation-table 'translation-table) - 0) - char)) + (encode-char char 'koi8-r)) ;;;###autoload (defun cyrillic-encode-alternativnyj-char (char) "Return ALTERNATIVNYJ external character code of CHAR if appropriate." - (aref (char-table-extra-slot - (get 'cyrillic-alternativnyj-nonascii-translation-table - 'translation-table) - 0) - char)) + (encode-char char 'alternativnyj)) ;; Display @@ -176,117 +169,13 @@ If the argument is nil, we return the display table to its standard state." (aset standard-display-table ?,L*(B [?N ?j]) (aset standard-display-table ?,L/(B [?D ?j]) - ;; Unicode version: - (aset standard-display-table ?$,1(P(B [?a]) - (aset standard-display-table ?$,1(Q(B [?b]) - (aset standard-display-table ?$,1(R(B [?v]) - (aset standard-display-table ?$,1(S(B [?g]) - (aset standard-display-table ?$,1(T(B [?d]) - (aset standard-display-table ?$,1(U(B [?e]) - (aset standard-display-table ?$,1(q(B [?y ?o]) - (aset standard-display-table ?$,1(V(B [?z ?h]) - (aset standard-display-table ?$,1(W(B [?z]) - (aset standard-display-table ?$,1(X(B [?i]) - (aset standard-display-table ?$,1(Y(B [?j]) - (aset standard-display-table ?$,1(Z(B [?k]) - (aset standard-display-table ?$,1([(B [?l]) - (aset standard-display-table ?$,1(\(B [?m]) - (aset standard-display-table ?$,1(](B [?n]) - (aset standard-display-table ?$,1(^(B [?o]) - (aset standard-display-table ?$,1(_(B [?p]) - (aset standard-display-table ?$,1(`(B [?r]) - (aset standard-display-table ?$,1(a(B [?s]) - (aset standard-display-table ?$,1(b(B [?t]) - (aset standard-display-table ?$,1(c(B [?u]) - (aset standard-display-table ?$,1(d(B [?f]) - (aset standard-display-table ?$,1(e(B [?k ?h]) - (aset standard-display-table ?$,1(f(B [?t ?s]) - (aset standard-display-table ?$,1(g(B [?c ?h]) - (aset standard-display-table ?$,1(h(B [?s ?h]) - (aset standard-display-table ?$,1(i(B [?s ?c ?h]) - (aset standard-display-table ?$,1(j(B [?~]) - (aset standard-display-table ?$,1(k(B [?y]) - (aset standard-display-table ?$,1(l(B [?']) - (aset standard-display-table ?$,1(m(B [?e ?']) - (aset standard-display-table ?$,1(n(B [?y ?u]) - (aset standard-display-table ?$,1(o(B [?y ?a]) - - (aset standard-display-table ?$,1(0(B [?A]) - (aset standard-display-table ?$,1(1(B [?B]) - (aset standard-display-table ?$,1(2(B [?V]) - (aset standard-display-table ?$,1(3(B [?G]) - (aset standard-display-table ?$,1(4(B [?D]) - (aset standard-display-table ?$,1(5(B [?E]) - (aset standard-display-table ?$,1(!(B [?Y ?o]) - (aset standard-display-table ?$,1(6(B [?Z ?h]) - (aset standard-display-table ?$,1(7(B [?Z]) - (aset standard-display-table ?$,1(8(B [?I]) - (aset standard-display-table ?$,1(9(B [?J]) - (aset standard-display-table ?$,1(:(B [?K]) - (aset standard-display-table ?$,1(;(B [?L]) - (aset standard-display-table ?$,1(<(B [?M]) - (aset standard-display-table ?$,1(=(B [?N]) - (aset standard-display-table ?$,1(>(B [?O]) - (aset standard-display-table ?$,1(?(B [?P]) - (aset standard-display-table ?$,1(@(B [?R]) - (aset standard-display-table ?$,1(A(B [?S]) - (aset standard-display-table ?$,1(B(B [?T]) - (aset standard-display-table ?$,1(C(B [?U]) - (aset standard-display-table ?$,1(D(B [?F]) - (aset standard-display-table ?$,1(E(B [?K ?h]) - (aset standard-display-table ?$,1(F(B [?T ?s]) - (aset standard-display-table ?$,1(G(B [?C ?h]) - (aset standard-display-table ?$,1(H(B [?S ?h]) - (aset standard-display-table ?$,1(I(B [?S ?c ?h]) - (aset standard-display-table ?$,1(J(B [?~]) - (aset standard-display-table ?$,1(K(B [?Y]) - (aset standard-display-table ?$,1(L(B [?']) - (aset standard-display-table ?$,1(M(B [?E ?']) - (aset standard-display-table ?$,1(N(B [?Y ?u]) - (aset standard-display-table ?$,1(O(B [?Y ?a]) - - (aset standard-display-table ?$,1(t(B [?i ?e]) - (aset standard-display-table ?$,1(w(B [?i]) - (aset standard-display-table ?$,1(~(B [?u]) - (aset standard-display-table ?$,1(r(B [?d ?j]) - (aset standard-display-table ?$,1({(B [?c ?h ?j]) - (aset standard-display-table ?$,1(s(B [?g ?j]) - (aset standard-display-table ?$,1(u(B [?s]) - (aset standard-display-table ?$,1(|(B [?k]) - (aset standard-display-table ?$,1(v(B [?i]) - (aset standard-display-table ?$,1(x(B [?j]) - (aset standard-display-table ?$,1(y(B [?l ?j]) - (aset standard-display-table ?$,1(z(B [?n ?j]) - (aset standard-display-table ?$,1((B [?d ?z]) - - (aset standard-display-table ?$,1($(B [?Y ?e]) - (aset standard-display-table ?$,1('(B [?Y ?i]) - (aset standard-display-table ?$,1(.(B [?U]) - (aset standard-display-table ?$,1("(B [?D ?j]) - (aset standard-display-table ?$,1(+(B [?C ?h ?j]) - (aset standard-display-table ?$,1(#(B [?G ?j]) - (aset standard-display-table ?$,1(%(B [?S]) - (aset standard-display-table ?$,1(,(B [?K]) - (aset standard-display-table ?$,1(&(B [?I]) - (aset standard-display-table ?$,1(((B [?J]) - (aset standard-display-table ?$,1()(B [?L ?j]) - (aset standard-display-table ?$,1(*(B [?N ?j]) - (aset standard-display-table ?$,1(/(B [?D ?j]) - (when (equal cyrillic-language "Bulgarian") (aset standard-display-table ?,Li(B [?s ?h ?t]) (aset standard-display-table ?,LI(B [?S ?h ?t]) (aset standard-display-table ?,Ln(B [?i ?u]) (aset standard-display-table ?,LN(B [?I ?u]) (aset standard-display-table ?,Lo(B [?i ?a]) - (aset standard-display-table ?,LO(B [?I ?a]) - ;; Unicode version: - (aset standard-display-table ?$,1(i(B [?s ?h ?t]) - (aset standard-display-table ?$,1(I(B [?S ?h ?t]) - (aset standard-display-table ?$,1(n(B [?i ?u]) - (aset standard-display-table ?$,1(N(B [?I ?u]) - (aset standard-display-table ?$,1(o(B [?i ?a]) - (aset standard-display-table ?$,1(O(B [?I ?a])) + (aset standard-display-table ?,LO(B [?I ?a])) (when (equal cyrillic-language "Ukrainian") ; based on the official ; transliteration table @@ -295,14 +184,7 @@ If the argument is nil, we return the display table to its standard state." (aset standard-display-table ?,LY(B [?i]) (aset standard-display-table ?,L9(B [?Y]) (aset standard-display-table ?,Ln(B [?i ?u]) - (aset standard-display-table ?,Lo(B [?i ?a]) - ;; Unicode version: - (aset standard-display-table ?$,1(X(B [?y]) - (aset standard-display-table ?$,1(8(B [?Y]) - (aset standard-display-table ?$,1(Y(B [?i]) - (aset standard-display-table ?$,1(9(B [?Y]) - (aset standard-display-table ?$,1(n(B [?i ?u]) - (aset standard-display-table ?$,1(o(B [?i ?a])))) + (aset standard-display-table ?,Lo(B [?i ?a])))) ;; (provide 'cyril-util) diff --git a/lisp/language/cyrillic.el b/lisp/language/cyrillic.el index 742da4fc5a0..f95a5427a12 100644 --- a/lisp/language/cyrillic.el +++ b/lisp/language/cyrillic.el @@ -1,8 +1,11 @@ ;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. ;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 ;; Author: Kenichi Handa <handa@etl.go.jp> ;; Keywords: multilingual, Cyrillic, i18n @@ -54,22 +57,21 @@ ;; ISO-8859-5 stuff -(make-coding-system - 'cyrillic-iso-8bit 2 ?5 - "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)." - '(ascii cyrillic-iso8859-5 nil nil - nil nil nil nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii cyrillic-iso8859-5) - (mime-charset . iso-8859-5))) +(define-coding-system 'cyrillic-iso-8bit + "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)." + :coding-type 'charset + :mnemonic ?5 + :charset-list '(iso-8859-5) + :mime-charset 'iso-8859-5) (define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit) (set-language-info-alist - "Cyrillic-ISO" '((charset cyrillic-iso8859-5) + "Cyrillic-ISO" '((charset iso-8859-5) (coding-system cyrillic-iso-8bit) (coding-priority cyrillic-iso-8bit) (input-method . "cyrillic-yawerty") ; fixme - (nonascii-translation . cyrillic-iso8859-5) + (nonascii-translation . iso-8859-5) (unibyte-display . cyrillic-iso-8bit) (features cyril-util) (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") @@ -78,144 +80,25 @@ ;; KOI-8R stuff -;; The mule-unicode portion of this is from -;; http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT, -;; which references RFC 1489. -(defvar cyrillic-koi8-r-decode-table - [ - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - ;; 8859-5 plus Unicode - ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B - ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1s"(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B - ?$,2 p(B ?$,2 q(B ?$,2 r(B ?,Lq(B ?$,2 s(B ?$,2 t(B ?$,2 u(B ?$,2 v(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,2 |(B ?$,2 }(B ?$,2 ~(B - ?$,2 (B ?$,2! (B ?$,2!!(B ?,L!(B ?$,2!"(B ?$,2!#(B ?$,2!$(B ?$,2!%(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,2!+(B ?$,2!,(B ?,A)(B - ?,Ln(B ?,LP(B ?,LQ(B ?,Lf(B ?,LT(B ?,LU(B ?,Ld(B ?,LS(B ?,Le(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B - ?,L_(B ?,Lo(B ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,LV(B ?,LR(B ?,Ll(B ?,Lk(B ?,LW(B ?,Lh(B ?,Lm(B ?,Li(B ?,Lg(B ?,Lj(B - ?,LN(B ?,L0(B ?,L1(B ?,LF(B ?,L4(B ?,L5(B ?,LD(B ?,L3(B ?,LE(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B - ?,L?(B ?,LO(B ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,L6(B ?,L2(B ?,LL(B ?,LK(B ?,L7(B ?,LH(B ?,LM(B ?,LI(B ?,LG(B ?,LJ(B - ;; All Unicode: -;; ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B -;; ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1s"(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,A (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B -;; ?$,2 p(B ?$,2 q(B ?$,2 r(B ?$,1(q(B ?$,2 s(B ?$,2 t(B ?$,2 u(B ?$,2 v(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,2 |(B ?$,2 }(B ?$,2 ~(B -;; ?$,2 (B ?$,2! (B ?$,2!!(B ?$,1(!(B ?$,2!"(B ?$,2!#(B ?$,2!$(B ?$,2!%(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,2!+(B ?$,2!,(B ?,A)(B -;; ?$,1(n(B ?$,1(P(B ?$,1(Q(B ?$,1(f(B ?$,1(T(B ?$,1(U(B ?$,1(d(B ?$,1(S(B ?$,1(e(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B -;; ?$,1(_(B ?$,1(o(B ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(V(B ?$,1(R(B ?$,1(l(B ?$,1(k(B ?$,1(W(B ?$,1(h(B ?$,1(m(B ?$,1(i(B ?$,1(g(B ?$,1(j(B -;; ?$,1(N(B ?$,1(0(B ?$,1(1(B ?$,1(F(B ?$,1(4(B ?$,1(5(B ?$,1(D(B ?$,1(3(B ?$,1(E(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B -;; ?$,1(?(B ?$,1(O(B ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(6(B ?$,1(2(B ?$,1(L(B ?$,1(K(B ?$,1(7(B ?$,1(H(B ?$,1(M(B ?$,1(I(B ?$,1(G(B ?$,1(J(B - ] - "Cyrillic KOI8-R decoding table.") - -(let ((table (make-translation-table-from-vector - cyrillic-koi8-r-decode-table))) - (define-translation-table 'cyrillic-koi8-r-nonascii-translation-table table) - (define-translation-table 'cyrillic-koi8-r-encode-table - (char-table-extra-slot table 0))) - -;; No point in keeping it around. (It can't be let-bound, since it's -;; needed for macro expansion.) -(makunbound 'cyrillic-koi8-r-decode-table) - -(define-ccl-program ccl-decode-koi8 - `(4 - ((loop - (r0 = 0) - (read r1) - (if (r1 < 128) - (write-repeat r1) - ((translate-character cyrillic-koi8-r-nonascii-translation-table r0 r1) - (translate-character ucs-translation-table-for-decode r0 r1) - (write-multibyte-character r0 r1) - (repeat)))))) - "CCL program to decode KOI8-R.") - -(define-ccl-program ccl-encode-koi8 - `(1 - ((loop - (read-multibyte-character r0 r1) - (translate-character cyrillic-koi8-r-encode-table r0 r1) - (if (r0 != ,(charset-id 'ascii)) - (if (r0 != ,(charset-id 'eight-bit-graphic)) - (if (r0 != ,(charset-id 'eight-bit-control)) - (r1 = ??)))) - (write-repeat r1)))) - "CCL program to encode KOI8-R.") - -(defun cyrillic-unify-encoding (table) - "Set up equivalent characters in the encoding TABLE. -This works whether or not the table is Unicode-based or -8859-5-based. (Only appropriate for Cyrillic.)" - (let ((table (get table 'translation-table))) - (dotimes (i 96) - (let* ((c (make-char 'cyrillic-iso8859-5 (+ i 32))) - (u ; equivalent Unicode char - (cond ((eq c ?,L (B) ?,A (B) - ((eq c ?,L-(B) ?,A-(B) - ((eq c ?,L}(B) ?,A'(B) - (t (decode-char 'ucs (+ #x400 i))))) - (ec (aref table c)) ; encoding of 8859-5 - (uc (aref table u))) ; encoding of Unicode - (unless (memq c '(?,L (B ?,L-(B ?,L}(B)) ; 8859-5 exceptions - (unless uc - (aset table u ec)) - (unless ec - (aset table c uc))))))) - -(cyrillic-unify-encoding 'cyrillic-koi8-r-encode-table) - -(make-coding-system - 'cyrillic-koi8 4 - ;; We used to use ?K. It is true that ?K is more strictly correct, - ;; but it is also used for Korean. - ;; So people who use koi8 for languages other than Russian - ;; will have to forgive us. - ?R "KOI8-R 8-bit encoding for Cyrillic (MIME: KOI8-R)." - '(ccl-decode-koi8 . ccl-encode-koi8) - `((safe-chars . cyrillic-koi8-r-encode-table) - (mime-charset . koi8-r) - (valid-codes (0 . 255)) - (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode))) +(define-coding-system 'cyrillic-koi8 + "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)." + :coding-type 'charset + ;; We used to use ?K. It is true that ?K is more strictly correct, + ;; but it is also used for Korean. So people who use koi8 for + ;; languages other than Russian will have to forgive us. + :mnemonic ?R + :charset-list '(koi8) + :mime-charset 'koi8-r) (define-coding-system-alias 'koi8-r 'cyrillic-koi8) (define-coding-system-alias 'koi8 'cyrillic-koi8) (define-coding-system-alias 'cp878 'cyrillic-koi8) -;; Allow displaying some of KOI & al with an 8859-5-encoded font. We -;; won't bother about the exceptions when encoding the font, since -;; NBSP will fall through below and work anyhow, and we'll have -;; avoided setting the fontset for the other two to 8859-5 -- they're -;; not in KOI and Alternativnyj anyhow. -(define-ccl-program ccl-encode-8859-5-font - `(0 - ((if (r0 == ,(charset-id 'cyrillic-iso8859-5)) - (r1 += 128) - (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) - (r1 = (r2 + 128)))))) - "Encode ISO 8859-5 and Cyrillic Unicode chars to 8859-5 font.") - -(add-to-list 'font-ccl-encoder-alist '("iso8859-5" . ccl-encode-8859-5-font)) - -;; The table is set up later to encode both Unicode and 8859-5. -(define-ccl-program ccl-encode-koi8-font - `(0 - (translate-character cyrillic-koi8-r-encode-table r0 r1)) - "CCL program to encode Cyrillic chars to KOI font.") - -(add-to-list 'font-ccl-encoder-alist '("koi8" . ccl-encode-koi8-font)) - (set-language-info-alist - "Cyrillic-KOI8" `((charset cyrillic-iso8859-5) - (nonascii-translation - . ,(get 'cyrillic-koi8-r-nonascii-translation-table - 'translation-table)) + "Cyrillic-KOI8" `((charset koi8) (coding-system cyrillic-koi8) (coding-priority cyrillic-koi8 cyrillic-iso-8bit) + (nonascii-translation . koi8) (input-method . "russian-typewriter") (features cyril-util) (unibyte-display . cyrillic-koi8) @@ -239,200 +122,21 @@ Support for Russian using koi8-r and the russian-computer input method.") (tutorial . "TUTORIAL.ru")) '("Cyrillic")) - -(defvar cyrillic-koi8-u-decode-table - [ - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - ;; All Unicode: -;; ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B -;; ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1x9(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B -;; ?$,2 p(B ?$,2 q(B ?$,2 r(B ?$,1(q(B ?$,1(t(B ?$,2 t(B ?$,1(v(B ?$,1(w(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,1)Q(B ?$,2 }(B ?$,2 ~(B -;; ?$,2 (B ?$,2! (B ?$,2!!(B ?$,1(!(B ?$,1($(B ?$,2!#(B ?$,1(&(B ?$,1('(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,1)P(B ?$,2!,(B ?,A)(B -;; ?$,1(n(B ?$,1(P(B ?$,1(Q(B ?$,1(f(B ?$,1(T(B ?$,1(U(B ?$,1(d(B ?$,1(S(B ?$,1(e(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B -;; ?$,1(_(B ?$,1(o(B ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(V(B ?$,1(R(B ?$,1(l(B ?$,1(k(B ?$,1(W(B ?$,1(h(B ?$,1(m(B ?$,1(i(B ?$,1(g(B ?$,1(j(B -;; ?$,1(N(B ?$,1(0(B ?$,1(1(B ?$,1(F(B ?$,1(4(B ?$,1(5(B ?$,1(D(B ?$,1(3(B ?$,1(E(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B -;; ?$,1(?(B ?$,1(O(B ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(6(B ?$,1(2(B ?$,1(L(B ?$,1(K(B ?$,1(7(B ?$,1(H(B ?$,1(M(B ?$,1(I(B ?$,1(G(B ?$,1(J(B -;; 8859-5 plus Unicode: - ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B - ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1x9(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B - ?$,2 p(B ?$,2 q(B ?$,2 r(B ?,Lq(B ?,Lt(B ?$,2 t(B ?,Lv(B ?,Lw(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,1)Q(B ?$,2 }(B ?$,2 ~(B - ?$,2 (B ?$,2! (B ?$,2!!(B ?,L!(B ?,L$(B ?$,2!#(B ?,L&(B ?,L'(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,1)P(B ?$,2!,(B ?,A)(B - ?,Ln(B ?,LP(B ?,LQ(B ?,Lf(B ?,LT(B ?,LU(B ?,Ld(B ?,LS(B ?,Le(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B - ?,L_(B ?,Lo(B ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,LV(B ?,LR(B ?,Ll(B ?,Lk(B ?,LW(B ?,Lh(B ?,Lm(B ?,Li(B ?,Lg(B ?,Lj(B - ?,LN(B ?,L0(B ?,L1(B ?,LF(B ?,L4(B ?,L5(B ?,LD(B ?,L3(B ?,LE(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B - ?,L?(B ?,LO(B ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,L6(B ?,L2(B ?,LL(B ?,LK(B ?,L7(B ?,LH(B ?,LM(B ?,LI(B ?,LG(B ?,LJ(B - ] - "Cyrillic KOI8-U decoding table.") - -(let ((table (make-translation-table-from-vector - cyrillic-koi8-u-decode-table))) - (define-translation-table 'cyrillic-koi8-u-nonascii-translation-table table) - (define-translation-table 'cyrillic-koi8-u-encode-table - (char-table-extra-slot table 0))) - -(makunbound 'cyrillic-koi8-u-decode-table) - -(define-ccl-program ccl-decode-koi8-u - `(4 - ((loop - (r0 = 0) - (read r1) - (if (r1 < 128) - (write-repeat r1) - ((translate-character cyrillic-koi8-u-nonascii-translation-table r0 r1) - (translate-character ucs-translation-table-for-decode r0 r1) - (write-multibyte-character r0 r1) - (repeat)))))) - "CCL program to decode KOI8-U.") - -(define-ccl-program ccl-encode-koi8-u - `(1 - ((loop - (read-multibyte-character r0 r1) - (translate-character cyrillic-koi8-u-encode-table r0 r1) - (if (r0 != ,(charset-id 'ascii)) - (if (r0 != ,(charset-id 'eight-bit-graphic)) - (if (r0 != ,(charset-id 'eight-bit-control)) - (r1 = ??)))) - (write-repeat r1)))) - "CCL program to encode KOI8-U.") - -(cyrillic-unify-encoding 'cyrillic-koi8-u-encode-table) - -(make-coding-system - 'koi8-u 4 - ?U "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" - '(ccl-decode-koi8-u . ccl-encode-koi8-u) - `((safe-chars . cyrillic-koi8-u-encode-table) - (mime-charset . koi8-u) - (valid-codes (0 . 255)) - (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode))) - -(define-ccl-program ccl-encode-koi8-u-font - `(0 - (translate-character cyrillic-koi8-u-encode-table r0 r1)) - "CCL program to encode Cyrillic chars to KOI-U font.") - -(add-to-list 'font-ccl-encoder-alist '("koi8-u" . ccl-encode-koi8-u-font)) - -(set-language-info-alist - "Ukrainian" `((coding-system koi8-u) - (coding-priority koi8-u) - (nonascii-translation - . ,(get 'cyrillic-koi8-u-nonascii-translation-table - 'translation-table)) - (input-method . "ukrainian-computer") - (features code-pages) - (documentation - . "Support for Ukrainian with KOI8-U character set.")) - '("Cyrillic")) - ;;; ALTERNATIVNYJ stuff -;; Fixme: It's unclear what's the correct table. I've found -;; statements both that it's the same as cp866 and somewhat different, -;; but nothing that looks really definitive. -(defvar cyrillic-alternativnyj-decode-table - [ - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 -;; ?$,1(0(B ?$,1(1(B ?$,1(2(B ?$,1(3(B ?$,1(4(B ?$,1(5(B ?$,1(6(B ?$,1(7(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B ?$,1(?(B -;; ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(D(B ?$,1(E(B ?$,1(F(B ?$,1(G(B ?$,1(H(B ?$,1(I(B ?$,1(J(B ?$,1(K(B ?$,1(L(B ?$,1(M(B ?$,1(N(B ?$,1(O(B -;; ?$,1(P(B ?$,1(Q(B ?$,1(R(B ?$,1(S(B ?$,1(T(B ?$,1(U(B ?$,1(V(B ?$,1(W(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B ?$,1(_(B -;; ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,2 "(B ?$,2 D(B ?$,2!!(B ?$,2!"(B ?$,2 v(B ?$,2 u(B ?$,2!#(B ?$,2 q(B ?$,2 w(B ?$,2 }(B ?$,2 |(B ?$,2 {(B ?$,2 0(B -;; ?$,2 4(B ?$,2 T(B ?$,2 L(B ?$,2 <(B ?$,2 (B ?$,2 \(B ?$,2 ~(B ?$,2 (B ?$,2 z(B ?$,2 t(B ?$,2!)(B ?$,2!&(B ?$,2! (B ?$,2 p(B ?$,2!,(B ?$,2!'(B -;; ?$,2!((B ?$,2!$(B ?$,2!%(B ?$,2 y(B ?$,2 x(B ?$,2 r(B ?$,2 s(B ?$,2!+(B ?$,2!*(B ?$,2 8(B ?$,2 ,(B ?$,2!H(B ?$,2!D(B ?$,2!L(B ?$,2!P(B ?$,2!@(B -;; ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(d(B ?$,1(e(B ?$,1(f(B ?$,1(g(B ?$,1(h(B ?$,1(i(B ?$,1(j(B ?$,1(k(B ?$,1(l(B ?$,1(m(B ?$,1(n(B ?$,1(o(B -;; ?$,1(!(B ?$,1(q(B ?$,1ry(B ?$,1rx(B ?$,1%A(B ?$,1%@(B ?$,1s:(B ?$,1s9(B ?$,1vq(B ?$,1vs(B ?,A1(B ?,Aw(B ?$,1uV(B ?,A$(B ?$,2!`(B ?,A (B ; -;; 8859+Unicode - ?,L0(B ?,L1(B ?,L2(B ?,L3(B ?,L4(B ?,L5(B ?,L6(B ?,L7(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B ?,L?(B - ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,LD(B ?,LE(B ?,LF(B ?,LG(B ?,LH(B ?,LI(B ?,LJ(B ?,LK(B ?,LL(B ?,LM(B ?,LN(B ?,LO(B - ?,LP(B ?,LQ(B ?,LR(B ?,LS(B ?,LT(B ?,LU(B ?,LV(B ?,LW(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B ?,L_(B - ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,2 "(B ?$,2 D(B ?$,2!!(B ?$,2!"(B ?$,2 v(B ?$,2 u(B ?$,2!#(B ?$,2 q(B ?$,2 w(B ?$,2 }(B ?$,2 |(B ?$,2 {(B ?$,2 0(B - ?$,2 4(B ?$,2 T(B ?$,2 L(B ?$,2 <(B ?$,2 (B ?$,2 \(B ?$,2 ~(B ?$,2 (B ?$,2 z(B ?$,2 t(B ?$,2!)(B ?$,2!&(B ?$,2! (B ?$,2 p(B ?$,2!,(B ?$,2!'(B - ?$,2!((B ?$,2!$(B ?$,2!%(B ?$,2 y(B ?$,2 x(B ?$,2 r(B ?$,2 s(B ?$,2!+(B ?$,2!*(B ?$,2 8(B ?$,2 ,(B ?$,2!H(B ?$,2!D(B ?$,2!L(B ?$,2!P(B ?$,2!@(B - ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,Ld(B ?,Le(B ?,Lf(B ?,Lg(B ?,Lh(B ?,Li(B ?,Lj(B ?,Lk(B ?,Ll(B ?,Lm(B ?,Ln(B ?,Lo(B - ;; Taken from http://www.cyrillic.com/ref/cyrillic/koi-8alt.html - ;; with guesses for the Unicodes of the glyphs in the absence of a - ;; table. - ?,L!(B ?,Lq(B ?$,1ry(B ?$,1rx(B ?$,1%A(B ?$,1%@(B ?$,1s:(B ?$,1s9(B ?$,1vq(B ?$,1vs(B ?,A1(B ?,Aw(B ?,Lp(B ?,A$(B ?$,2!`(B ?,L (B] - "Cyrillic ALTERNATIVNYJ decoding table.") - -(let ((table (make-translation-table-from-vector - cyrillic-alternativnyj-decode-table))) - (define-translation-table 'cyrillic-alternativnyj-nonascii-translation-table - table) - (define-translation-table 'cyrillic-alternativnyj-encode-table - (char-table-extra-slot table 0))) - -(makunbound 'cyrillic-alternativnyj-decode-table) - -(define-ccl-program ccl-decode-alternativnyj - `(4 - ((loop - (r0 = 0) - (read r1) - (if (r1 < 128) - (write-repeat r1) - ((translate-character cyrillic-alternativnyj-nonascii-translation-table - r0 r1) - (translate-character ucs-translation-table-for-decode r0 r1) - (write-multibyte-character r0 r1) - (repeat)))))) - "CCL program to decode Alternativnyj.") - -(define-ccl-program ccl-encode-alternativnyj - `(1 - ((loop - (read-multibyte-character r0 r1) - (translate-character cyrillic-alternativnyj-encode-table r0 r1) - (if (r0 != ,(charset-id 'ascii)) - (if (r0 != ,(charset-id 'eight-bit-graphic)) - (if (r0 != ,(charset-id 'eight-bit-control)) - (r1 = ??)))) - (write-repeat r1)))) - "CCL program to encode Alternativnyj.") - -(cyrillic-unify-encoding 'cyrillic-alternativnyj-encode-table) - -(make-coding-system - 'cyrillic-alternativnyj 4 ?A - "ALTERNATIVNYJ 8-bit encoding for Cyrillic." - '(ccl-decode-alternativnyj . ccl-encode-alternativnyj) - `((safe-chars . cyrillic-alternativnyj-encode-table) - (valid-codes (0 . 255)) - (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode))) +(define-coding-system 'cyrillic-alternativnyj + "ALTERNATIVNYJ 8-bit encoding for Cyrillic." + :coding-type 'charset + :mnemonic ?A + :charset-list '(alternativnyj)) (define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj) -(define-ccl-program ccl-encode-alternativnyj-font - `(0 - (translate-character cyrillic-alternativnyj-encode-table r0 r1)) - "CCL program to encode Cyrillic chars to Alternativnyj font.") - -(add-to-list 'font-ccl-encoder-alist - '("alternativnyj" . ccl-encode-alternativnyj-font)) - (set-language-info-alist - "Cyrillic-ALT" `((charset cyrillic-iso8859-5) - (nonascii-translation - . ,(get 'cyrillic-alternativnyj-nonascii-translation-table - 'translation-table)) + "Cyrillic-ALT" `((charset alternativnyj) (coding-system cyrillic-alternativnyj) (coding-priority cyrillic-alternativnyj) + (nonascii-translation . alternativnyj) (input-method . "russian-typewriter") (features cyril-util) (unibyte-display . cyrillic-alternativnyj) @@ -440,21 +144,77 @@ Support for Russian using koi8-r and the russian-computer input method.") (documentation . "Support for Cyrillic ALTERNATIVNYJ.")) '("Cyrillic")) -(set-language-info-alist - "Windows-1251" `((coding-system windows-1251) - (coding-priority windows-1251) - (nonascii-translation - . ,(get 'decode-windows-1252 'translation-table)) - (input-method . "russian-typewriter") ; fixme? - (features code-pages) - (documentation . "Support for windows-1251 character set.")) - '("Cyrillic")) +(define-coding-system 'cp866 + "CP866 encoding for Cyrillic." + :coding-type 'charset + :mnemonic ?* + :charset-list '(ibm866) + :mime-charset 'cp866) + +(define-coding-system 'koi8-u + "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" + :coding-type 'charset + :mnemonic ?U + :charset-list '(koi8-u) + :mime-charset 'koi8-u) + +(define-coding-system 'koi8-t + "KOI8-T 8-bit encoding for Cyrillic" + :coding-type 'charset + :mnemonic ?* + :charset-list '(koi8-t) + :mime-charset 'koi8-t) + +(define-coding-system 'windows-1251 + "windows-1251 8-bit encoding for Cyrillic (MIME: WINDOWS-1251)" + :coding-type 'charset + :mnemonic ?b + :charset-list '(windows-1251) + :mime-charset 'windows-1251) +(define-coding-system-alias 'cp1251 'windows-1251) + +(define-coding-system 'cp1125 + "cp1125 8-bit encoding for Cyrillic" + :coding-type 'charset + :mnemonic ?* + :charset-list '(cp1125)) +(define-coding-system-alias 'ruscii 'cp1125) +;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua> +(define-coding-system-alias 'cp866u 'cp1125) + +(define-coding-system 'cp855 + "DOS codepage 855 (Russian)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp855) + :mime-charset 'cp855) +(define-coding-system-alias 'ibm855 'cp855) + +(define-coding-system 'mik + "Bulgarian DOS codepage" + :coding-type 'charset + :mnemonic ?D + :charset-list '(mik)) + +(define-coding-system 'pt154 + "Parattype Asian Cyrillic codepage" + :coding-type 'charset + :mnemonic ?D + :charset-list '(pt154)) + +;; (set-language-info-alist +;; "Windows-1251" `((coding-system windows-1251) +;; (coding-priority windows-1251) +;; (input-method . "russian-typewriter") ; fixme? +;; (features code-pages) +;; (documentation . "Support for windows-1251 character set.")) +;; '("Cyrillic")) (set-language-info-alist - "Tajik" `((coding-system cyrillic-koi8-t) - (coding-priority cyrillic-koi8-t) - (nonascii-translation - . ,(get 'decode-koi8-t 'translation-table)) + "Tajik" `((coding-system koi8-t) + (coding-priority koi8-t) + (nonascii-translation . cyrillic-koi8-t) + (charset koi8-t) (input-method . "russian-typewriter") ; fixme? (features code-pages) (documentation . "Support for Tajik using KOI8-T.")) @@ -462,21 +222,20 @@ Support for Russian using koi8-r and the russian-computer input method.") (set-language-info-alist "Bulgarian" `((coding-system windows-1251) - (coding-priority windows-1251) - (nonascii-translation - . ,(get 'decode-windows-1251 'translation-table)) + (coding-priority windows-1251) + (nonascii-translation . windows-1251) + (charset windows-1251) (input-method . "bulgarian-bds") - (features code-pages) + (features code-pages) (documentation - . "Support for Bulgarian with windows-1251 character set.") - (tutorial . "TUTORIAL.bg")) + . "Support for Bulgrian with windows-1251 character set.")) '("Cyrillic")) (set-language-info-alist "Belarusian" `((coding-system windows-1251) (coding-priority windows-1251) - (nonascii-translation - . ,(get 'decode-windows-1251 'translation-table)) + (nonascii-translation . windows-1251) + (charset windows-1251) (input-method . "belarusian") (features code-pages) (documentation @@ -484,6 +243,14 @@ Support for Russian using koi8-r and the russian-computer input method.") \(The name Belarusian replaced Byelorussian in the early 1990s.)")) '("Cyrillic")) +(set-language-info-alist + "Ukrainian" `((coding-system koi8-u) + (coding-priority koi8-u) + (input-method . "ukrainian-computer") + (documentation + . "Support for Ukrainian with koi8-u character set.")) + '("Cyrillic")) + (provide 'cyrillic) ;;; cyrillic.el ends here diff --git a/lisp/language/czech.el b/lisp/language/czech.el index cba9673596c..0d3cb342d45 100644 --- a/lisp/language/czech.el +++ b/lisp/language/czech.el @@ -34,8 +34,7 @@ (coding-system . (iso-8859-2)) (coding-priority . (iso-8859-2)) (input-method . "czech") - (nonascii-translation . latin-iso8859-2) - (unibyte-syntax . "latin-2") + (nonascii-translation . iso-8859-2) (unibyte-display . iso-8859-2) (tutorial . "TUTORIAL.cs") (sample-text . "P,Bx(Bejeme v,Ba(Bm hezk,B}(B den!") diff --git a/lisp/language/devan-util.el b/lisp/language/devan-util.el index ccbaf36e64c..4e3fbc9a257 100644 --- a/lisp/language/devan-util.el +++ b/lisp/language/devan-util.el @@ -1,4 +1,4 @@ -;;; devan-util.el --- Support for composing Devanagari characters +;;; devan-util.el --- Support for composing Devanagari characters -*-coding: iso-2022-7bit;-*- ;; Copyright (C) 2001 Free Software Foundation, Inc. @@ -31,8 +31,6 @@ ;;; Code: -;;;###autoload - ;; Devanagari Composable Pattern ;; C .. Consonants ;; V .. Vowel @@ -51,6 +49,11 @@ (defconst devanagari-consonant "[$,15U(B-$,15y68(B-$,16?(B]") + ;;("$,16B(B" . nil) + ;;("$,16A(B" . nil) + ;;("$,16C(B" . nil) + + (defconst devanagari-composable-pattern (concat "\\([$,15E(B-$,15T6@6A(B][$,15A5B(B]?\\)\\|[$,15C6D(B]" @@ -60,6 +63,13 @@ "\\)") "Regexp matching a composable sequence of Devanagari characters.") +(dolist (range '((#x0903 . #x0903) + (#x0905 . #x0939) + (#x0958 . #x0961))) + (set-char-table-range indian-composable-pattern range + devanagari-composable-pattern)) + +;;;###autoload (defun devanagari-compose-region (from to) (interactive "r") (save-excursion @@ -67,8 +77,8 @@ (narrow-to-region from to) (goto-char (point-min)) (while (re-search-forward devanagari-composable-pattern nil t) - (devanagari-compose-syllable-region (match-beginning 0) - (match-end 0)))))) + (devanagari-compose-syllable-region (match-beginning 0) + (match-end 0)))))) (defun devanagari-compose-string (string) (with-temp-buffer (insert (decompose-string string)) @@ -84,11 +94,6 @@ (set-buffer-modified-p buffer-modified-p) (- (point-max) (point-min)))))) -(defun devanagari-range (from to) - "Make the list of the integers of range FROM to TO." - (let (result) - (while (<= from to) (setq result (cons to result) to (1- to))) result)) - (defun devanagari-regexp-of-hashtbl-keys (hashtbl) "Return a regular expression that matches all keys in hashtable HASHTBL." (let ((max-specpdl-size 1000)) @@ -99,21 +104,18 @@ dummy) (function (lambda (x y) (> (length x) (length y)))))))) -(defun devanagari-composition-function (from to pattern &optional string) - "Compose Devanagari characters in REGION, or STRING if specified. -Assume that the REGION or STRING must fully match the composable -PATTERN regexp." - (if string (devanagari-compose-syllable-string string) - (devanagari-compose-syllable-region from to)) - (- to from)) - -;; Register a function to compose Devanagari characters. -(mapc - (function (lambda (ucs) - (aset composition-function-table (decode-char 'ucs ucs) - (list (cons devanagari-composable-pattern - 'devanagari-composition-function))))) - (nconc '(#x0903) (devanagari-range #x0905 #x0939) (devanagari-range #x0958 #x0961))) +;;;###autoload +(defun devanagari-composition-function (pos &optional string) + "Compose Devanagari characters after the position POS. +If STRING is not nil, it is a string, and POS is an index to the string. +In this case, compose characters after POS of the string." + (if string + ;; Not yet implemented. + nil + (goto-char pos) + (if (looking-at devanagari-composable-pattern) + (prog1 (match-end 0) + (devanagari-compose-syllable-region pos (match-end 0)))))) ;; Notes on conversion steps. @@ -490,11 +492,10 @@ preferred rule from the sanskrit fonts." ) (defvar dev-glyph-glyph-2-regexp (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-2-hash)) - (defun dev-charseq (from &optional to) (if (null to) (setq to from)) - (mapcar (function (lambda (x) (indian-glyph-char x 'devanagari))) - (devanagari-range from to))) + (number-sequence (decode-char 'devanagari-cdac from) + (decode-char 'devanagari-cdac to))) (defvar dev-glyph-cvn (append @@ -564,84 +565,89 @@ preferred rule from the sanskrit fonts." ) (defun devanagari-compose-syllable-region (from to) "Compose devanagari syllable in region FROM to TO." (let ((glyph-str nil) (cons-num 0) glyph-str-list - (last-halant nil) (preceding-r nil) (last-modifier nil) - (last-char (char-before to)) match-str - glyph-block split-pos) + (last-halant nil) (preceding-r nil) (last-modifier nil) + (last-char (char-before to)) match-str + glyph-block split-pos) (save-excursion (save-restriction - ;;; *** char-to-glyph conversion *** - ;; Special rule 1. -- Last halant must be preserved. - (if (eq last-char ?$,16-(B) - (progn - (setq last-halant t) - (narrow-to-region from (1- to))) - (narrow-to-region from to) - ;; note if the last char is modifier. - (if (or (eq last-char ?$,15A(B) (eq last-char ?$,15B(B)) - (setq last-modifier t))) - (goto-char (point-min)) - ;; Special rule 2. -- preceding "r halant" must be modifier. - (when (looking-at "$,15p6-(B.") - (setq preceding-r t) - (goto-char (+ 2 (point)))) - ;; translate the rest characters into glyphs - (while (re-search-forward dev-char-glyph-regexp nil t) - (setq match-str (match-string 0)) - (setq glyph-str - (concat glyph-str - (gethash match-str dev-char-glyph-hash))) - ;; count the number of consonant-glyhs. - (if (string-match devanagari-consonant match-str) - (setq cons-num (1+ cons-num)))) - ;; preceding-r must be attached before the anuswar if exists. - (if preceding-r - (if last-modifier - (setq glyph-str (concat (substring glyph-str 0 -1) - "$,4"'(B" (substring glyph-str -1))) - (setq glyph-str (concat glyph-str "$,4"'(B")))) - (if last-halant (setq glyph-str (concat glyph-str "$,4""(B"))) - ;;; *** glyph-to-glyph conversion *** - (when (string-match dev-glyph-glyph-regexp glyph-str) - (setq glyph-str - (replace-match (gethash (match-string 0 glyph-str) - dev-glyph-glyph-hash) - nil t glyph-str)) - (if (and (> cons-num 1) - (string-match dev-glyph-glyph-2-regexp glyph-str)) - (setq glyph-str - (replace-match (gethash (match-string 0 glyph-str) - dev-glyph-glyph-2-hash) - nil t glyph-str)))) - ;;; *** glyph reordering *** - (while (setq split-pos (string-match "$,4""(B\\|.$" glyph-str)) - (setq glyph-block (substring glyph-str 0 (1+ split-pos))) - (setq glyph-str (substring glyph-str (1+ split-pos))) - (setq - glyph-block - (if (string-match dev-glyph-right-modifier-regexp glyph-block) - (sort (string-to-list glyph-block) - (function (lambda (x y) - (< (get-char-code-property x 'composition-order) - (get-char-code-property y 'composition-order))))) - (sort (string-to-list glyph-block) - (function (lambda (x y) - (let ((xo (get-char-code-property x 'composition-order)) - (yo (get-char-code-property y 'composition-order))) - (if (= xo 2) nil (if (= yo 2) t (< xo yo))))))))) - (setq glyph-str-list (nconc glyph-str-list glyph-block))) - ;; concatenate and attach reference-points. - (setq glyph-str - (cdr - (apply - 'nconc - (mapcar - (function (lambda (x) - (list - (or (get-char-code-property x 'reference-point) - '(5 . 3) ;; default reference point. - ) - x))) - glyph-str-list)))))) + ;;; *** char-to-glyph conversion *** + ;; Special rule 1. -- Last halant must be preserved. + (if (eq last-char ?$,16-(B) + (progn + (setq last-halant t) + (narrow-to-region from (1- to))) + (narrow-to-region from to) + ;; note if the last char is modifier. + (if (or (eq last-char ?$,15A(B) (eq last-char ?$,15B(B)) + (setq last-modifier t))) + (goto-char (point-min)) + ;; Special rule 2. -- preceding "r halant" must be modifier. + (when (looking-at "$,15p6-(B.") + (setq preceding-r t) + (goto-char (+ 2 (point)))) + ;; translate the rest characters into glyphs + (while (not (eobp)) + (if (looking-at dev-char-glyph-regexp) + (let ((end (match-end 0))) + (setq match-str (match-string 0) + glyph-str + (concat glyph-str + (gethash match-str dev-char-glyph-hash))) + ;; count the number of consonant-glyhs. + (if (string-match devanagari-consonant match-str) + (setq cons-num (1+ cons-num))) + (goto-char end)) + (setq glyph-str (concat glyph-str (string (following-char)))) + (forward-char 1))) + ;; preceding-r must be attached before the anuswar if exists. + (if preceding-r + (if last-modifier + (setq glyph-str (concat (substring glyph-str 0 -1) + "$,4"'(B" (substring glyph-str -1))) + (setq glyph-str (concat glyph-str "$,4"'(B")))) + (if last-halant (setq glyph-str (concat glyph-str "$,4""(B"))) + ;;; *** glyph-to-glyph conversion *** + (when (string-match dev-glyph-glyph-regexp glyph-str) + (setq glyph-str + (replace-match (gethash (match-string 0 glyph-str) + dev-glyph-glyph-hash) + nil t glyph-str)) + (if (and (> cons-num 1) + (string-match dev-glyph-glyph-2-regexp glyph-str)) + (setq glyph-str + (replace-match (gethash (match-string 0 glyph-str) + dev-glyph-glyph-2-hash) + nil t glyph-str)))) + ;;; *** glyph reordering *** + (while (setq split-pos (string-match "$,4""(B\\|.$" glyph-str)) + (setq glyph-block (substring glyph-str 0 (1+ split-pos))) + (setq glyph-str (substring glyph-str (1+ split-pos))) + (setq + glyph-block + (if (string-match dev-glyph-right-modifier-regexp glyph-block) + (sort (string-to-list glyph-block) + (function (lambda (x y) + (< (get-char-code-property x 'composition-order) + (get-char-code-property y 'composition-order))))) + (sort (string-to-list glyph-block) + (function (lambda (x y) + (let ((xo (get-char-code-property x 'composition-order)) + (yo (get-char-code-property y 'composition-order))) + (if (= xo 2) nil (if (= yo 2) t (< xo yo))))))))) + (setq glyph-str-list (nconc glyph-str-list glyph-block))) + ;; concatenate and attach reference-points. + (setq glyph-str + (cdr + (apply + 'nconc + (mapcar + (function (lambda (x) + (list + (or (get-char-code-property x 'reference-point) + '(5 . 3) ;; default reference point. + ) + x))) + glyph-str-list)))))) (compose-region from to glyph-str))) (provide 'devan-util) diff --git a/lisp/language/devanagari.el b/lisp/language/devanagari.el index 4360c0c6df3..94f11b403d4 100644 --- a/lisp/language/devanagari.el +++ b/lisp/language/devanagari.el @@ -33,8 +33,8 @@ "Devanagari" '((charset indian-is13194 mule-unicode-0100-24ff indian-2-column indian-glyph ;; comment out later ) - (coding-system in-is13194) - (coding-priority in-is13194) + (coding-system in-is13194-devanagari) + (coding-priority in-is13194-devanagari) (input-method . "dev-aiba") (features devan-util) (documentation . "\ @@ -42,6 +42,13 @@ Such languages using Devanagari script as Hindi and Marathi are supported in this language environment.")) '("Indian")) +;; For automatic composition. +(dolist (range '((#x0903 . #x0903) + (#x0905 . #x0939) + (#x0958 . #x0961))) + (set-char-table-range composition-function-table range + 'devanagari-composition-function)) + (provide 'devanagari) ;;; devanagari.el ends here diff --git a/lisp/language/english.el b/lisp/language/english.el index af7dbafdb71..342dea6da98 100644 --- a/lisp/language/english.el +++ b/lisp/language/english.el @@ -1,7 +1,10 @@ ;;; english.el --- support for English -*- no-byte-compile: t -*- ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 ;; Keywords: multibyte character, character set, syntax, category @@ -38,6 +41,26 @@ Nothing special is needed to handle English.") )) +;; Mostly because we can now... +(define-coding-system 'ebcdic-us + "US version of EBCDIC" + :coding-type 'charset + :charset-list '(ebcdic-us) + :mnemonic ?*) + +(define-coding-system 'ebcdic-uk + "UK version of EBCDIC" + :coding-type 'charset + :charset-list '(ebcdic-uk) + :mnemonic ?*) + +(define-coding-system 'ibm1047 + "A version of EBCDIC used in OS/390 Unix" ; says Groff + :coding-type 'charset + :charset-list '(ibm1047) + :mnemonic ?*) +(define-coding-system-alias 'cp1047 'ibm1047) + ;; Make "ASCII" an alias of "English" language environment. (set-language-info-alist "ASCII" (cdr (assoc "English" language-info-alist))) diff --git a/lisp/language/european.el b/lisp/language/european.el index 7c8d728523c..e56c5f49df4 100644 --- a/lisp/language/european.el +++ b/lisp/language/european.el @@ -1,8 +1,11 @@ ;;; european.el --- support for European languages -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1995, 1997, 2001 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. ;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 ;; Keywords: multilingual, European @@ -25,19 +28,19 @@ ;;; Commentary: -;; For European scripts, character sets ISO8859-1,2,3,4,9,14,15 are -;; supported. +;; For European scripts, character sets ISO8859-1,2,3,4,9,10,13,14,15, +;; windows-1250,2,4,7, mac-roman, adobe-standard-encoding, cp850 and +;; next are supported. ;;; Code: ;; Latin-1 (ISO-8859-1) (set-language-info-alist - "Latin-1" '((charset ascii latin-iso8859-1) - (coding-system iso-latin-1) + "Latin-1" '((charset iso-8859-1) + (coding-system iso-latin-1 iso-latin-9 windows-1252) (coding-priority iso-latin-1) - (nonascii-translation . latin-iso8859-1) - (unibyte-syntax . "latin-1") + (nonascii-translation . iso-8859-1) (unibyte-display . iso-latin-1) (input-method . "latin-1-prefix") (sample-text @@ -65,23 +68,21 @@ Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans.")) ;; Latin-2 (ISO-8859-2) -(make-coding-system - 'iso-latin-2 2 ?2 - "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)." - '(ascii latin-iso8859-2 nil nil - nil nil nil nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii latin-iso8859-2) - (mime-charset . iso-8859-2))) +(define-coding-system 'iso-latin-2 + "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)." + :coding-type 'charset + :mnemonic ?2 + :charset-list '(iso-8859-2) + :mime-charset 'iso-8859-2) (define-coding-system-alias 'iso-8859-2 'iso-latin-2) (define-coding-system-alias 'latin-2 'iso-latin-2) (set-language-info-alist - "Latin-2" '((charset ascii latin-iso8859-2) - (coding-system iso-latin-2) + "Latin-2" '((charset iso-8859-2) + (coding-system iso-latin-2 windows-1250) (coding-priority iso-latin-2) - (nonascii-translation . latin-iso8859-2) - (unibyte-syntax . "latin-2") + (nonascii-translation . iso-8859-2) (unibyte-display . iso-latin-2) (input-method . "latin-2-prefix") (documentation . "\ @@ -93,6 +94,7 @@ character set which supports the following languages: We also have specific language environments for the following languages: For Czech, \"Czech\". For Croatian, \"Croatian\". + For Polish, \"Polish\". For Romanian, \"Romanian\". For Slovak, \"Slovak\".")) '("European")) @@ -100,23 +102,21 @@ We also have specific language environments for the following languages: ;; Latin-3 (ISO-8859-3) -(make-coding-system - 'iso-latin-3 2 ?3 - "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)." - '(ascii latin-iso8859-3 nil nil - nil nil nil nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii latin-iso8859-3) - (mime-charset . iso-8859-3))) +(define-coding-system 'iso-latin-3 + "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)." + :coding-type 'charset + :mnemonic ?3 + :charset-list '(iso-8859-3) + :mime-charset 'iso-8859-3) (define-coding-system-alias 'iso-8859-3 'iso-latin-3) (define-coding-system-alias 'latin-3 'iso-latin-3) (set-language-info-alist - "Latin-3" '((charset ascii latin-iso8859-3) + "Latin-3" '((charset iso-8859-3) (coding-system iso-latin-3) (coding-priority iso-latin-3) - (nonascii-translation . latin-iso8859-3) - (unibyte-syntax . "latin-3") + (nonascii-translation . iso-8859-3) (unibyte-display . iso-latin-3) (input-method . "latin-3-prefix") (documentation . "\ @@ -128,23 +128,21 @@ These languages are supported with the Latin-3 (ISO-8859-3) character set: ;; Latin-4 (ISO-8859-4) -(make-coding-system - 'iso-latin-4 2 ?4 - "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)." - '(ascii latin-iso8859-4 nil nil - nil nil nil nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii latin-iso8859-4) - (mime-charset . iso-8859-4))) +(define-coding-system 'iso-latin-4 + "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)." + :coding-type 'charset + :mnemonic ?4 + :charset-list '(iso-8859-4) + :mime-charset 'iso-8859-4) (define-coding-system-alias 'iso-8859-4 'iso-latin-4) (define-coding-system-alias 'latin-4 'iso-latin-4) (set-language-info-alist - "Latin-4" '((charset ascii latin-iso8859-4) + "Latin-4" '((charset iso-8859-4) (coding-system iso-8859-4) (coding-priority iso-8859-4) - (nonascii-translation . latin-iso8859-4) - (unibyte-syntax . "latin-4") + (nonascii-translation . iso-8859-4) (unibyte-display . iso-8859-4) (input-method . "latin-4-postfix") (documentation . "\ @@ -156,49 +154,91 @@ These languages are supported with the Latin-4 (ISO-8859-4) character set: ;; Latin-5 (ISO-8859-9) -(make-coding-system - 'iso-latin-5 2 ?9 - "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)." - '(ascii latin-iso8859-9 nil nil - nil nil nil nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii latin-iso8859-9) - (mime-charset . iso-8859-9))) +(define-coding-system 'iso-latin-5 + "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)." + :coding-type 'charset + :mnemonic ?9 + :charset-list '(iso-8859-9) + :mime-charset 'iso-8859-9) (define-coding-system-alias 'iso-8859-9 'iso-latin-5) (define-coding-system-alias 'latin-5 'iso-latin-5) (set-language-info-alist - "Latin-5" '((charset ascii latin-iso8859-9) + "Latin-5" '((charset iso-8859-9) (coding-system iso-latin-5) (coding-priority iso-latin-5) - (nonascii-translation . latin-iso8859-9) - (unibyte-syntax . "latin-5") + (nonascii-translation . iso-8859-9) (unibyte-display . iso-latin-5) (input-method . "latin-5-postfix") - (documentation . "Support for Turkish language.")) + (documentation . "Support for Latin-5.\ +See also the Turkish environment.")) '("European")) +;; Latin-6 (ISO-8859-10) + +(define-coding-system 'iso-latin-6 + "ISO 2022 based 8-bit encoding for Latin-6 (MIME:ISO-8859-10)." + :coding-type 'charset + :mnemonic ?9 + :charset-list '(iso-8859-10) + :mime-charset 'iso-8859-10) + +(define-coding-system-alias 'iso-8859-10 'iso-latin-6) +(define-coding-system-alias 'latin-6 'iso-latin-6) + +(set-language-info-alist + "Latin-6" '((charset iso-8859-10) + (coding-system iso-latin-6) + (coding-priority iso-latin-6) + (nonascii-translation . iso-8859-10) + (unibyte-display . iso-latin-6) + ;; Fixme: input method. + (documentation . "Support for generic Latin-6 (Northern European).")) + '("European")) + + +;; Latin-7 (ISO-8859-13) + +(define-coding-system 'iso-latin-7 + "ISO 2022 based 8-bit encoding for Latin-7 (MIME:ISO-8859-13)." + :coding-type 'charset + :mnemonic ?9 + :charset-list '(iso-8859-13) + :mime-charset 'iso-8859-13) + +(define-coding-system-alias 'iso-8859-13 'iso-latin-7) +(define-coding-system-alias 'latin-7 'iso-latin-7) + +(set-language-info-alist + "Latin-7" '((charset iso-8859-13) + (coding-system iso-latin-7) + (coding-priority iso-latin-7) + (nonascii-translation . iso-8859-13) + (unibyte-display . iso-latin-7) + ;; Fixme: input method. + (documentation . "Support for generic Latin-7 (Baltic Rim).")) + '("European")) + ;; Latin-8 (ISO-8859-14) -(make-coding-system - 'iso-latin-8 2 ?W ; `W' for `Welsh', since `C' - ; for `Celtic' is taken. - "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)." - '(ascii latin-iso8859-14 nil nil - nil nil nil nil nil nil nil nil nil nil nil t t) - '((safe-charsets ascii latin-iso8859-14) - (mime-charset . iso-8859-14))) +(define-coding-system 'iso-latin-8 + "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)." + :coding-type 'charset + ;; `W' for `Welsh', since `C' for `Celtic' is taken. + :mnemonic ?W + :charset-list '(iso-8859-14) + :mime-charset 'iso-8859-14) (define-coding-system-alias 'iso-8859-14 'iso-latin-8) (define-coding-system-alias 'latin-8 'iso-latin-8) (set-language-info-alist - "Latin-8" '((charset ascii latin-iso8859-14) + "Latin-8" '((charset iso-8859-14) (coding-system iso-latin-8) (coding-priority iso-latin-8) - (nonascii-translation . latin-iso8859-14) - (unibyte-syntax . "latin-8") + (nonascii-translation . iso-8859-14) (unibyte-display . iso-latin-8) (input-method . "latin-8-prefix") ;; Fixme: Welsh/Ga{e}lic greetings @@ -212,24 +252,23 @@ covered by other ISO-8859 character sets: ;; Latin-9 (ISO-8859-15) -(make-coding-system - 'iso-latin-9 2 ?0 ; `0' for `Latin-0' - "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)." - '(ascii latin-iso8859-15 nil nil - nil nil nil nil nil nil nil nil nil nil nil t t) - '((safe-charsets ascii latin-iso8859-15) - (mime-charset . iso-8859-15))) +(define-coding-system 'iso-latin-9 + "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)." + :coding-type 'charset + ;; `0' for `Latin-0' + :mnemonic ?0 + :charset-list '(iso-8859-15) + :mime-charset 'iso-8859-15) (define-coding-system-alias 'iso-8859-15 'iso-latin-9) (define-coding-system-alias 'latin-9 'iso-latin-9) (define-coding-system-alias 'latin-0 'iso-latin-9) (set-language-info-alist - "Latin-9" '((charset ascii latin-iso8859-15) + "Latin-9" '((charset iso-8859-15) (coding-system iso-latin-9) (coding-priority iso-latin-9) - (nonascii-translation . latin-iso8859-15) - (unibyte-syntax . "latin-9") + (nonascii-translation . iso-8859-15) (unibyte-display . iso-latin-9) (input-method . "latin-9-prefix") (sample-text @@ -241,13 +280,115 @@ addition of the Euro sign and some additional French and Finnish letters. Latin-9 is sometimes nicknamed `Latin-0'.")) '("European")) +(define-coding-system 'windows-1250 + "windows-1250 (Central European) encoding (MIME: WINDOWS-1250)" + :coding-type 'charset + :mnemonic ?* + :charset-list '(windows-1250) + :mime-charset 'windows-1250) +(define-coding-system-alias 'cp1250 'windows-1250) + +(define-coding-system 'windows-1252 + "windows-1252 (Western European) encoding (MIME: WINDOWS-1252)" + :coding-type 'charset + :mnemonic ?* + :charset-list '(windows-1252) + :mime-charset 'windows-1252) +(define-coding-system-alias 'cp1252 'windows-1252) + +(define-coding-system 'windows-1254 + "windows-1254 (Turkish) encoding (MIME: WINDOWS-1254)" + :coding-type 'charset + :mnemonic ?* + :charset-list '(windows-1254) + :mime-charset 'windows-1254) +(define-coding-system-alias 'cp1254 'windows-1254) + +(define-coding-system 'windows-1257 + "windows-1257 (Baltic) encoding (MIME: WINDOWS-1257)" + :coding-type 'charset + :mnemonic ?* + :charset-list '(windows-1257) + :mime-charset 'windows-1257) +(define-coding-system-alias 'cp1257 'windows-1257) + +(define-coding-system 'cp850 + "DOS codepage 850 (Western European)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp850) + :mime-charset 'cp850) +(define-coding-system-alias 'ibm850 'cp850) + +(define-coding-system 'cp852 + "DOS codepage 852 (Slavic)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp852) + :mime-charset 'cp852) +(define-coding-system-alias 'ibm852 'cp852) + +(define-coding-system 'cp857 + "DOS codepage 857 (Turkish)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp857) + :mime-charset 'cp857) +(define-coding-system-alias 'ibm857 'cp857) + +(define-coding-system 'cp858 + "Codepage 858 (Multilingual Latin I + Euro)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp858) + :mime-charset 'cp858) + +(define-coding-system 'cp860 + "DOS codepage 860 (Portuguese)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp860) + :mime-charset 'cp860) +(define-coding-system-alias 'ibm860 'cp860) + +(define-coding-system 'cp861 + "DOS codepage 861 (Icelandic)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp861) + :mime-charset 'cp861) +(define-coding-system-alias 'ibm861 'cp861) + +(define-coding-system 'cp863 + "DOS codepage 863 (French Canadian)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp863) + :mime-charset 'cp863) +(define-coding-system-alias 'ibm863 'cp863) + +(define-coding-system 'cp865 + "DOS codepage 865 (Norwegian/Danish)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp865) + :mime-charset 'cp865) +(define-coding-system-alias 'ibm865 'cp865) + +(define-coding-system 'cp437 + "DOS codepage 437" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp437) + :mime-charset 'cp437) +(define-coding-system-alias 'ibm437 'cp437) + (set-language-info-alist "Dutch" '((tutorial . "TUTORIAL.nl") - (charset ascii latin-iso8859-1) + (charset iso-8859-1) (coding-system iso-latin-1 iso-latin-9) (coding-priority iso-latin-1) - (nonascii-translation . latin-iso8859-1) - (unibyte-syntax . "latin-1") + (nonascii-translation . iso-8859-1) (unibyte-display . iso-latin-1) (input-method . "dutch") (sample-text . "Er is een aantal manieren waarop je dit kan doen") @@ -258,12 +399,11 @@ but it selects the Dutch tutorial and input method.")) (set-language-info-alist "German" '((tutorial . "TUTORIAL.de") - (charset ascii latin-iso8859-1) + (charset iso-8859-1) (coding-system iso-latin-1 iso-latin-9) (coding-priority iso-latin-1) + (nonascii-translation . iso-8859-1) (input-method . "german-postfix") - (nonascii-translation . latin-iso8859-1) - (unibyte-syntax . "latin-1") (unibyte-display . iso-latin-1) (sample-text . "\ German (Deutsch Nord) Guten Tag @@ -276,11 +416,10 @@ Additionally, it selects the German tutorial.")) (set-language-info-alist "French" '((tutorial . "TUTORIAL.fr") - (charset ascii latin-iso8859-1) + (charset iso-8859-1) (coding-system iso-latin-1 iso-latin-9) (coding-priority iso-latin-1) - (nonascii-translation . latin-iso8859-1) - (unibyte-syntax . "latin-1") + (nonascii-translation . iso-8859-1) (unibyte-display . iso-latin-1) (input-method . "latin-1-prefix") (sample-text . "French (Fran,Ag(Bais) Bonjour, Salut") @@ -291,11 +430,10 @@ but it selects the French tutorial and input method.")) (set-language-info-alist "Italian" '((tutorial . "TUTORIAL.it") - (charset ascii latin-iso8859-1) + (charset iso-8859-1) (coding-system iso-latin-1 iso-latin-9) (coding-priority iso-latin-1) - (nonascii-translation . latin-iso8859-1) - (unibyte-syntax . "latin-1") + (nonascii-translation . iso-8859-1) (unibyte-display . iso-latin-1) (input-method . "italian-postfix") (sample-text . "Salve, ciao!") @@ -306,12 +444,11 @@ Additionally, it selects the Italian tutorial.")) '("European")) (set-language-info-alist - "Slovenian" '((charset . (ascii latin-iso8859-2)) - (coding-system . (iso-8859-2)) + "Slovenian" '((charset iso-8859-2) + (coding-system . (iso-8859-2 windows-1250)) (coding-priority . (iso-8859-2)) - (nonascii-translation . latin-iso8859-2) + (nonascii-translation . iso-8859-2) (input-method . "slovenian") - (unibyte-syntax . "latin-2") (unibyte-display . iso-8859-2) (tutorial . "TUTORIAL.sl") (sample-text . ",B.(Belimo vam uspe,B9(Ben dan!") @@ -322,12 +459,11 @@ but it selects the Slovenian tutorial and input method.")) (set-language-info-alist "Spanish" '((tutorial . "TUTORIAL.es") - (charset ascii latin-iso8859-1) + (charset iso-8859-1) (coding-system iso-latin-1 iso-latin-9) (coding-priority iso-latin-1) (input-method . "spanish-postfix") - (nonascii-translation . latin-iso8859-1) - (unibyte-syntax . "latin-1") + (nonascii-translation . iso-8859-1) (unibyte-display . iso-latin-1) (sample-text . "Spanish (Espa,Aq(Bol) ,A!(BHola!") (documentation . "\ @@ -342,27 +478,36 @@ and it selects the Spanish tutorial.")) ;; "Latin-3" language environment. (set-language-info-alist - "Turkish" '((charset ascii latin-iso8859-9) - (coding-system iso-latin-5 iso-latin-3) + "Turkish" '((charset iso-8859-9) + (coding-system iso-latin-5 windows-1254 iso-latin-3) (coding-priority iso-latin-5) - (nonascii-translation . latin-iso8859-9) - (unibyte-syntax . "latin-5") + (nonascii-translation . iso-8859-9) (unibyte-display . iso-latin-5) (input-method . "turkish-postfix") (sample-text . "Turkish (T,M|(Brk,Mg(Be) Merhaba") - (documentation . t))) + (setup-function + . (lambda () + (set-case-syntax-pair ?I ?,C9(B (standard-case-table)) + (set-case-syntax-pair ?,C)(B ?i (standard-case-table)))) + (exit-function + . (lambda () + (set-case-syntax-pair ?I ?i (standard-case-table)) + (set-case-syntax ?,C9(B "w" (standard-case-table)) + (set-case-syntax ?,C)(B "w" (standard-case-table)))) + (documentation . "Support for Turkish. +Differs from the Latin-5 environment in using the `turkish-postfix' input +method and applying Turkish case rules for the characters i, I, ,C9(B, ,C)(B."))) ;; Polish ISO 8859-2 environment. ;; Maintainer: Wlodek Bzyl <matwb@univ.gda.pl> ;; Keywords: multilingual, Polish (set-language-info-alist - "Polish" '((charset . (ascii latin-iso8859-2)) - (coding-system . (iso-8859-2)) - (coding-priority . (iso-8859-2)) + "Polish" '((charset iso-8859-2) + (coding-system iso-8859-2 windows-1250) + (coding-priority iso-8859-2) (input-method . "polish-slash") - (nonascii-translation . latin-iso8859-2) - (unibyte-syntax . "latin-2") + (nonascii-translation . iso-8859-2) (unibyte-display . iso-8859-2) (tutorial . "TUTORIAL.pl") (sample-text . "P,Bs(Bjd,B<(B, ki,Bq(B-,B?(Be t,Bj(B chmurno,B6f(B w g,B31(Bb flaszy") @@ -372,7 +517,7 @@ and it selects the Spanish tutorial.")) (set-language-info-alist "Welsh" `((coding-system utf-8 latin-8) ; the input method is Unicode-based (coding-priority utf-8 latin-8) - (nonascii-translation . latin-iso8859-14) + (nonascii-translation . iso-8859-14) (input-method . "welsh") (documentation . "Support for Welsh, using Unicode.")) '("European")) @@ -389,244 +534,118 @@ and it selects the Spanish tutorial.")) (set-language-info-alist "Latin-7" `((coding-system latin-7) (coding-priority latin-7) - (nonascii-translation . ,(get 'decode-iso-latin-7 - 'translation-table)) + (nonascii-translation . iso-8859-13) (input-method . "latin-prefix") - (features code-pages) (documentation . "Support for Latin-7, e.g. Latvian, Lithuanian.")) '("European")) (set-language-info-alist - "Lithuanian" `((coding-system latin-7) + "Lithuanian" `((coding-system latin-7 windows-1257) (coding-priority latin-7) + (nonascii-translation . iso-8859-13) (input-method . "lithuanian-keyboard") - (nonascii-translation . ,(get 'decode-iso-latin-7 - 'translation-table)) - (features code-pages) (documentation . "Support for Lithuanian.")) '("European")) (set-language-info-alist - "Latvian" `((coding-system latin-7) + "Latvian" `((coding-system latin-7 windows-1257) (coding-priority latin-7) + (nonascii-translation . iso-8859-13) (input-method . "latvian-keyboard") - (nonascii-translation . ,(get 'decode-iso-latin-7 - 'translation-table)) - (features code-pages) (documentation . "Support for Latvian.")) '("European")) (set-language-info-alist "Swedish" '((tutorial . "TUTORIAL.sv") - (charset ascii latin-iso8859-1) + (charset iso-8859-1) (coding-system iso-latin-1) (coding-priority iso-latin-1) - (nonascii-translation . latin-iso8859-1) - (unibyte-syntax . "latin-1") + (nonascii-translation . iso-8859-1) (unibyte-display . iso-latin-1) (sample-text . "Goddag Hej") (documentation . "Support for Swedish")) '("European")) (set-language-info-alist - "Croatian" '((charset . (ascii latin-iso8859-2)) - (coding-system . (iso-8859-2)) - (coding-priority . (iso-8859-2)) + "Croatian" '((charset iso-8859-2) + (coding-system iso-8859-2) + (coding-priority iso-8859-2) (input-method . "croatian") - (nonascii-translation . latin-iso8859-2) - (unibyte-syntax . "latin-2") + (nonascii-translation . iso-8859-2) (unibyte-display . iso-8859-2) (documentation . "Support for Croatian with Latin-2 encoding.")) '("European")) + + +(define-coding-system 'mac-roman + "Mac Roman Encoding (MIME:MACINTOSH)." + :coding-type 'charset + :mnemonic ?M + :charset-list '(mac-roman) + :mime-charset 'macintosh) + +(define-coding-system 'next + "NeXTstep encoding" + :coding-type 'charset + :mnemonic ?* + :charset-list '(next) + :mime-charset 'next) + +(define-coding-system 'hp-roman8 + "Hewlet-Packard roman-8 encoding (MIME:ROMAN-8)" + :coding-type 'charset + :mnemonic ?* + :charset-list '(hp-roman8) + :mime-charset 'hp-roman8) +(define-coding-system-alias 'roman8 'hp-roman8) + +(define-coding-system 'adobe-standard-encoding + "Adobe `standard' encoding for PostScript" + :coding-type 'charset + :mnemonic ?* + :charset-list '(adobe-standard-encoding) + :mime-charset 'adobe-standard-encoding) + -;; Definitions for the Mac Roman character sets and coding system. -;; The Mac Roman encoding uses all 128 code points in the range 128 to -;; 255 for actual characters. Emacs decodes them to one of the -;; following character sets. -;; ascii, latin-iso8859-1, mule-unicode-0100-24ff, -;; mule-unicode-2500-33ff, mule-unicode-e000-ffff - -(let - ((encoding-vector (make-vector 256 nil)) - (i 0) - (vec ;; mac-roman (128..255) -> UCS mapping - [ #x00C4 ;; 128:LATIN CAPITAL LETTER A WITH DIAERESIS - #x00C5 ;; 129:LATIN CAPITAL LETTER A WITH RING ABOVE - #x00C7 ;; 130:LATIN CAPITAL LETTER C WITH CEDILLA - #x00C9 ;; 131:LATIN CAPITAL LETTER E WITH ACUTE - #x00D1 ;; 132:LATIN CAPITAL LETTER N WITH TILDE - #x00D6 ;; 133:LATIN CAPITAL LETTER O WITH DIAERESIS - #x00DC ;; 134:LATIN CAPITAL LETTER U WITH DIAERESIS - #x00E1 ;; 135:LATIN SMALL LETTER A WITH ACUTE - #x00E0 ;; 136:LATIN SMALL LETTER A WITH GRAVE - #x00E2 ;; 137:LATIN SMALL LETTER A WITH CIRCUMFLEX - #x00E4 ;; 138:LATIN SMALL LETTER A WITH DIAERESIS - #x00E3 ;; 139:LATIN SMALL LETTER A WITH TILDE - #x00E5 ;; 140:LATIN SMALL LETTER A WITH RING ABOVE - #x00E7 ;; 141:LATIN SMALL LETTER C WITH CEDILLA - #x00E9 ;; 142:LATIN SMALL LETTER E WITH ACUTE - #x00E8 ;; 143:LATIN SMALL LETTER E WITH GRAVE - #x00EA ;; 144:LATIN SMALL LETTER E WITH CIRCUMFLEX - #x00EB ;; 145:LATIN SMALL LETTER E WITH DIAERESIS - #x00ED ;; 146:LATIN SMALL LETTER I WITH ACUTE - #x00EC ;; 147:LATIN SMALL LETTER I WITH GRAVE - #x00EE ;; 148:LATIN SMALL LETTER I WITH CIRCUMFLEX - #x00EF ;; 149:LATIN SMALL LETTER I WITH DIAERESIS - #x00F1 ;; 150:LATIN SMALL LETTER N WITH TILDE - #x00F3 ;; 151:LATIN SMALL LETTER O WITH ACUTE - #x00F2 ;; 152:LATIN SMALL LETTER O WITH GRAVE - #x00F4 ;; 153:LATIN SMALL LETTER O WITH CIRCUMFLEX - #x00F6 ;; 154:LATIN SMALL LETTER O WITH DIAERESIS - #x00F5 ;; 155:LATIN SMALL LETTER O WITH TILDE - #x00FA ;; 156:LATIN SMALL LETTER U WITH ACUTE - #x00F9 ;; 157:LATIN SMALL LETTER U WITH GRAVE - #x00FB ;; 158:LATIN SMALL LETTER U WITH CIRCUMFLEX - #x00FC ;; 159:LATIN SMALL LETTER U WITH DIAERESIS - #x2020 ;; 160:DAGGER - #x00B0 ;; 161:DEGREE SIGN - #x00A2 ;; 162:CENT SIGN - #x00A3 ;; 163:POUND SIGN - #x00A7 ;; 164:SECTION SIGN - #x2022 ;; 165:BULLET - #x00B6 ;; 166:PILCROW SIGN - #x00DF ;; 167:LATIN SMALL LETTER SHARP S - #x00AE ;; 168:REGISTERED SIGN - #x00A9 ;; 169:COPYRIGHT SIGN - #x2122 ;; 170:TRADE MARK SIGN - #x00B4 ;; 171:ACUTE ACCENT - #x00A8 ;; 172:DIAERESIS - #x2260 ;; 173:NOT EQUAL TO - #x00C6 ;; 174:LATIN CAPITAL LETTER AE - #x00D8 ;; 175:LATIN CAPITAL LETTER O WITH STROKE - #x221E ;; 176:INFINITY - #x00B1 ;; 177:PLUS-MINUS SIGN - #x2264 ;; 178:LESS-THAN OR EQUAL TO - #x2265 ;; 179:GREATER-THAN OR EQUAL TO - #x00A5 ;; 180:YEN SIGN - #x00B5 ;; 181:MICRO SIGN - #x2202 ;; 182:PARTIAL DIFFERENTIAL - #x2211 ;; 183:N-ARY SUMMATION - #x220F ;; 184:N-ARY PRODUCT - #x03C0 ;; 185:GREEK SMALL LETTER PI - #x222B ;; 186:INTEGRAL - #x00AA ;; 187:FEMININE ORDINAL INDICATOR - #x00BA ;; 188:MASCULINE ORDINAL INDICATOR - #x03A9 ;; 189:GREEK CAPITAL LETTER OMEGA - #x00E6 ;; 190:LATIN SMALL LETTER AE - #x00F8 ;; 191:LATIN SMALL LETTER O WITH STROKE - #x00BF ;; 192:INVERTED QUESTION MARK - #x00A1 ;; 193:INVERTED EXCLAMATION MARK - #x00AC ;; 194:NOT SIGN - #x221A ;; 195:SQUARE ROOT - #x0192 ;; 196:LATIN SMALL LETTER F WITH HOOK - #x2248 ;; 197:ALMOST EQUAL TO - #x2206 ;; 198:INCREMENT - #x00AB ;; 199:LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - #x00BB ;; 200:RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - #x2026 ;; 201:HORIZONTAL ELLIPSIS - #x00A0 ;; 202:NO-BREAK SPACE - #x00C0 ;; 203:LATIN CAPITAL LETTER A WITH GRAVE - #x00C3 ;; 204:LATIN CAPITAL LETTER A WITH TILDE - #x00D5 ;; 205:LATIN CAPITAL LETTER O WITH TILDE - #x0152 ;; 206:LATIN CAPITAL LIGATURE OE - #x0153 ;; 207:LATIN SMALL LIGATURE OE - #x2013 ;; 208:EN DASH - #x2014 ;; 209:EM DASH - #x201C ;; 210:LEFT DOUBLE QUOTATION MARK - #x201D ;; 211:RIGHT DOUBLE QUOTATION MARK - #x2018 ;; 212:LEFT SINGLE QUOTATION MARK - #x2019 ;; 213:RIGHT SINGLE QUOTATION MARK - #x00F7 ;; 214:DIVISION SIGN - #x25CA ;; 215:LOZENGE - #x00FF ;; 216:LATIN SMALL LETTER Y WITH DIAERESIS - #x0178 ;; 217:LATIN CAPITAL LETTER Y WITH DIAERESIS - #x2044 ;; 218:FRACTION SLASH - #x20AC ;; 219:EURO SIGN - #x2039 ;; 220:SINGLE LEFT-POINTING ANGLE QUOTATION MARK - #x203A ;; 221:SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - #xFB01 ;; 222:LATIN SMALL LIGATURE FI - #xFB02 ;; 223:LATIN SMALL LIGATURE FL - #x2021 ;; 224:DOUBLE DAGGER - #x00B7 ;; 225:MIDDLE DOT - #x201A ;; 226:SINGLE LOW-9 QUOTATION MARK - #x201E ;; 227:DOUBLE LOW-9 QUOTATION MARK - #x2030 ;; 228:PER MILLE SIGN - #x00C2 ;; 229:LATIN CAPITAL LETTER A WITH CIRCUMFLEX - #x00CA ;; 230:LATIN CAPITAL LETTER E WITH CIRCUMFLEX - #x00C1 ;; 231:LATIN CAPITAL LETTER A WITH ACUTE - #x00CB ;; 232:LATIN CAPITAL LETTER E WITH DIAERESIS - #x00C8 ;; 233:LATIN CAPITAL LETTER E WITH GRAVE - #x00CD ;; 234:LATIN CAPITAL LETTER I WITH ACUTE - #x00CE ;; 235:LATIN CAPITAL LETTER I WITH CIRCUMFLEX - #x00CF ;; 236:LATIN CAPITAL LETTER I WITH DIAERESIS - #x00CC ;; 237:LATIN CAPITAL LETTER I WITH GRAVE - #x00D3 ;; 238:LATIN CAPITAL LETTER O WITH ACUTE - #x00D4 ;; 239:LATIN CAPITAL LETTER O WITH CIRCUMFLEX - #xF8FF ;; 240:Apple logo - #x00D2 ;; 241:LATIN CAPITAL LETTER O WITH GRAVE - #x00DA ;; 242:LATIN CAPITAL LETTER U WITH ACUTE - #x00DB ;; 243:LATIN CAPITAL LETTER U WITH CIRCUMFLEX - #x00D9 ;; 244:LATIN CAPITAL LETTER U WITH GRAVE - #x0131 ;; 245:LATIN SMALL LETTER DOTLESS I - #x02C6 ;; 246:MODIFIER LETTER CIRCUMFLEX ACCENT - #x02DC ;; 247:SMALL TILDE - #x00AF ;; 248:MACRON - #x02D8 ;; 249:BREVE - #x02D9 ;; 250:DOT ABOVE - #x02DA ;; 251:RING ABOVE - #x00B8 ;; 252:CEDILLA - #x02DD ;; 253:DOUBLE ACUTE ACCENT - #x02DB ;; 254:OGONEK - #x02C7 ;; 255:CARON - ]) - translation-table) - (while (< i 128) - (aset encoding-vector i i) - (setq i (1+ i))) - (while (< i 256) - (aset encoding-vector i - (decode-char 'ucs (aref vec (- i 128)))) - (setq i (1+ i))) - (setq translation-table - (make-translation-table-from-vector encoding-vector)) - (define-translation-table 'mac-roman-decoder translation-table) - (define-translation-table 'mac-roman-encoder - (char-table-extra-slot translation-table 0))) - -(define-ccl-program decode-mac-roman - `(4 - ((loop - (read r1) - (if (r1 < 128) ;; ASCII - (r0 = ,(charset-id 'ascii)) - (if (r1 < 160) - (r0 = ,(charset-id 'eight-bit-control)) - (r0 = ,(charset-id 'eight-bit-graphic)))) - (translate-character mac-roman-decoder r0 r1) - (write-multibyte-character r0 r1) - (repeat)))) - "CCL program to decode Mac Roman") - -(define-ccl-program encode-mac-roman - `(1 - ((loop - (read-multibyte-character r0 r1) - (translate-character ucs-mule-to-mule-unicode r0 r1) - (translate-character mac-roman-encoder r0 r1) - (if (r0 != ,(charset-id 'ascii)) - (if (r0 != ,(charset-id 'eight-bit-graphic)) - (if (r0 != ,(charset-id 'eight-bit-control)) - (r1 = ??)))) - (write-repeat r1)))) - "CCL program to encode Mac Roman") - -(make-coding-system - 'mac-roman 4 ?M - "Mac Roman Encoding (MIME:MACINTOSH)." - '(decode-mac-roman . encode-mac-roman) - (list (cons 'safe-chars (get 'mac-roman-encoder 'translation-table)) - '(valid-codes (0 . 255)) - '(mime-charset . macintosh))) ; per IANA, rfc1345 +;; For automatic composing of diacritics and combining marks. +(dolist (range '( ;; combining diacritical marks + (#x0300 #x0314 (tc . bc)) + (#x0315 (tr . bl)) + (#x0316 #x0319 (bc . tc)) + (#x031A (tr . cl)) + (#x031B #x0320 (bc . tc)) + (#x0321 (Br . tr)) + (#x0322 (Br . tl)) + (#x0323 #x0333 (bc . tc)) + (#x0334 #x0338 (Bc . Bc)) + (#x0339 #x033C (bc . tc)) + (#x033D #x033F (tc . bc)) + (#x0340 (tl . bc)) + (#x0341 (tr . bc)) + (#x0342 #x0344 (tc . bc)) + (#x0345 (bc . tc)) + (#x0346 (tc . bc)) + (#x0347 #x0349 (bc . tc)) + (#x034A #x034C (tc . bc)) + (#x034D #x034E (bc . tc)) + ;; combining diacritical marks for symbols + (#x20D0 #x20D1 (tc . bc)) + (#x20D2 #x20D3 (Bc . Bc)) + (#x20D4 #x20D7 (tc . bc)) + (#x20D8 #x20DA (Bc . Bc)) + (#x20DB #x20DC (tc . bc)) + (#x20DD #x20E0 (Bc . Bc)) + (#x20E1 (tc . bc)) + (#x20E2 #x20E3 (Bc . Bc)))) + (let* ((from (car range)) + (to (if (= (length range) 3) + (nth 1 range) + from)) + (composition (car (last range)))) + (while (<= from to) + (put-char-code-property from 'diacritic-composition composition) + (aset composition-function-table from 'diacritic-composition-function) + (setq from (1+ from))))) (defconst diacritic-composition-pattern "\\C^\\c^+") @@ -658,29 +677,52 @@ positions (integers or markers) specifying the region." (diacritic-compose-region (point) (+ (point) len)) len) -(defun diacritic-composition-function (from to pattern &optional string) - "Compose diacritic text in the region FROM and TO. -The text matches the regular expression PATTERN. -Optional 4th argument STRING, if non-nil, is a string containing text +(defun diacritic-composition-function (pos &optional string) + "Compose diacritic text around POS. +Optional 2nd argument STRING, if non-nil, is a string containing text to compose. -The return value is the number of composed characters." - (when (< (1+ from) to) - (if string - (compose-string string from to) - (compose-region from to)) - (- to from))) - -;; Register a function to compose Unicode diacrtics and marks. -(let ((patterns '(("\\C^\\c^+" . diacritic-composition-function)))) - (let ((c #x300)) - (while (<= c #x362) - (aset composition-function-table (decode-char 'ucs c) patterns) - (setq c (1+ c))) - (setq c #x20d0) - (while (<= c #x20e3) - (aset composition-function-table (decode-char 'ucs c) patterns) - (setq c (1+ c))))) +The return value is the end position of composed characters, +or nil if no characters are composed." + (setq pos (1- pos)) + (if string + (let ((ch (aref string pos)) + start end components ch composition) + (when (and (>= pos 0) + ;; Previous character is latin. + (aref (char-category-set ch) ?l) + (/= ch 32)) + (setq start pos + end (length string) + components (list ch) + pos (1+ pos)) + (while (and + (< pos end) + (setq ch (aref string pos) + composition + (get-char-code-property ch 'diacritic-composition))) + (setq components (cons ch (cons composition components)) + pos (1+ pos))) + (compose-string string start pos (nreverse components)) + pos)) + (let ((ch (char-after pos)) + start end components composition) + (when (and (>= pos (point-min)) + (aref (char-category-set ch) ?l) + (/= ch 32)) + (setq start pos + end (point-max) + components (list ch) + pos (1+ pos)) + (while (and + (< pos end) + (setq ch (char-after pos) + composition + (get-char-code-property ch 'diacritic-composition))) + (setq components (cons ch (cons composition components)) + pos (1+ pos))) + (compose-region start pos (nreverse components)) + pos)))) (provide 'european) diff --git a/lisp/language/georgian.el b/lisp/language/georgian.el index 027c361c00b..f38529d20aa 100644 --- a/lisp/language/georgian.el +++ b/lisp/language/georgian.el @@ -1,6 +1,6 @@ ;;; georgian.el --- language support for Georgian -*- no-byte-compile: t -*- -;; Copyright (C) 2001 Free Software Foundation, Inc. +;; Copyright (C) 2001, 2003 Free Software Foundation, Inc. ;; Author: Dave Love <fx@gnu.org> ;; Keywords: i18n @@ -26,13 +26,23 @@ ;;; Code: +(define-coding-system 'georgian-ps + "Georgian PS encoding" + :coding-type 'charset + :mnemonic ?G + :charset-list '(georgian-ps)) + +(define-coding-system 'georgian-academy + "Georgian Academy encoding" + :coding-type 'charset + :mnemonic ?G + :charset-list '(georgian-academy)) + (set-language-info-alist "Georgian" `((coding-system georgian-ps) (coding-priority georgian-ps) (input-method . "georgian") - (features code-pages) - (nonascii-translation . ,(get 'decode-georgian-ps - 'translation-table)) + (nonascii-translation . georgian-ps) (documentation . "Support for georgian-ps character set.")) '("European")) ; fixme: is this appropriate for ; a non-Latin script? diff --git a/lisp/language/greek.el b/lisp/language/greek.el index b8843960723..6061ed203c8 100644 --- a/lisp/language/greek.el +++ b/lisp/language/greek.el @@ -1,7 +1,11 @@ ;;; greek.el --- support for Greek -*- no-byte-compile: t -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. +;; Copyright (C) 2002 Free Software Foundation, Inc. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 ;; Keywords: multilingual, Greek @@ -28,23 +32,52 @@ ;;; Code: -(make-coding-system - 'greek-iso-8bit 2 ?7 - "ISO 2022 based 8-bit encoding for Greek (MIME:ISO-8859-7)." - '(ascii greek-iso8859-7 nil nil - nil nil nil nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii greek-iso8859-7) - (mime-charset . iso-8859-7))) +(define-coding-system 'greek-iso-8bit + "ISO 2022 based 8-bit encoding for Greek (MIME:ISO-8859-7)." + :coding-type 'charset + :mnemonic ?7 + :charset-list '(iso-8859-7) + :mime-charset 'iso-8859-7) (define-coding-system-alias 'iso-8859-7 'greek-iso-8bit) +(define-coding-system 'windows-1253 + "windows-1253 encoding for Greek" + :coding-type 'charset + :mnemonic ?g + :charset-list '(windows-1253) + :mime-charset 'windows-1253) +(define-coding-system-alias 'cp1253 'windows-1253) + +(define-coding-system 'cp737 + "Codepage 737 (PC Greek)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp737) + :mime-charset 'cp737) + +(define-coding-system 'cp851 + "DOS codepage 851 (Greek)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp851) + :mime-charset 'cp851) +(define-coding-system-alias 'ibm851 'cp851) + +(define-coding-system 'cp869 + "DOS codepage 869 (Greek)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp869) + :mime-charset 'cp869) +(define-coding-system-alias 'ibm869 'cp869) + (set-language-info-alist - "Greek" '((charset . (greek-iso8859-7)) - (coding-system . (greek-iso-8bit)) + "Greek" '((charset iso-8859-7) + (coding-system greek-iso-8bit windows-1253 cp851 cp869) (coding-priority greek-iso-8bit) - (nonascii-translation . greek-iso8859-7) + (nonascii-translation . iso-8859-7) (input-method . "greek") - (unibyte-display . greek-iso-8bit) (documentation . t))) (provide 'greek) diff --git a/lisp/language/hebrew.el b/lisp/language/hebrew.el index 2bc79ff810d..871ec1b223e 100644 --- a/lisp/language/hebrew.el +++ b/lisp/language/hebrew.el @@ -1,8 +1,11 @@ ;;; hebrew.el --- support for Hebrew -*- coding: iso-2022-7bit; no-byte-compile: t -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. -;; Copyright (C) 2001 Free Software Foundation, Inc. +;; Licensed to the Free Software Foundation. +;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 ;; Keywords: multilingual, Hebrew @@ -31,13 +34,12 @@ ;;; Code: -(make-coding-system - 'hebrew-iso-8bit 2 ?8 - "ISO 2022 based 8-bit encoding for Hebrew (MIME:ISO-8859-8)." - '(ascii hebrew-iso8859-8 nil nil - nil nil nil nil nil nil nil nil t nil nil t) - '((safe-charsets ascii hebrew-iso8859-8) - (mime-charset . iso-8859-8))) +(define-coding-system 'hebrew-iso-8bit + "ISO 2022 based 8-bit encoding for Hebrew (MIME:ISO-8859-8)." + :coding-type 'charset + :mnemonic ?8 + :charset-list '(iso-8859-8) + :mime-charset 'iso-8859-8) (define-coding-system-alias 'iso-8859-8 'hebrew-iso-8bit) @@ -49,10 +51,10 @@ (define-coding-system-alias 'iso-8859-8-i 'hebrew-iso-8bit) (set-language-info-alist - "Hebrew" '((charset . (hebrew-iso8859-8)) + "Hebrew" '((charset iso-8859-8) (coding-priority hebrew-iso-8bit) - (coding-system . (hebrew-iso-8bit)) - (nonascii-translation . hebrew-iso8859-8) + (coding-system hebrew-iso-8bit windows-1255 cp862) + (nonascii-translation . iso-8859-8) (input-method . "hebrew") (unibyte-display . hebrew-iso-8bit) (sample-text . "Hebrew ,Hylem(B") @@ -61,11 +63,26 @@ (set-language-info-alist "Windows-1255" '((coding-priority windows-1255) (coding-system windows-1255) - (features code-pages) (documentation . "\ Support for Windows-1255 encoding, e.g. for Yiddish. Right-to-left writing is not yet supported."))) +(define-coding-system 'windows-1255 + "windows-1255 (Hebrew) encoding (MIME: WINDOWS-1255)" + :coding-type 'charset + :mnemonic ?h + :charset-list '(windows-1255) + :mime-charset 'windows-1255) +(define-coding-system-alias 'cp1255 'windows-1255) + +(define-coding-system 'cp862 + "DOS codepage 862 (Hebrew)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp862) + :mime-charset 'cp862) +(define-coding-system-alias 'ibm862 'cp862) + (provide 'hebrew) ;;; hebrew.el ends here diff --git a/lisp/language/ind-util.el b/lisp/language/ind-util.el index 60008cce48c..862ebf39e84 100644 --- a/lisp/language/ind-util.el +++ b/lisp/language/ind-util.el @@ -407,7 +407,7 @@ FUNCTION will be called 15 times." ;; trans-char -- nil / string / list of strings (when (and char trans-char) (if (stringp trans-char) (setq trans-char (list trans-char))) - (if (char-valid-p char) (setq char (char-to-string char))) + (if (characterp char) (setq char (char-to-string char))) (puthash char (car trans-char) encode-hash) (dolist (trans trans-char) (puthash trans char decode-hash))))) @@ -425,7 +425,7 @@ FUNCTION will be called 15 times." (defun indian--puthash-c (c trans-c halant hashtbls) (indian--map (lambda (c trans-c) - (if (char-valid-p c) (setq c (char-to-string c))) + (if (characterp c) (setq c (char-to-string c))) (indian--puthash-char (concat c halant) trans-c hashtbls)) c trans-c)) @@ -441,8 +441,8 @@ FUNCTION will be called 15 times." (indian--map (lambda (v trans-v) (when (and c trans-c v trans-v) - (if (char-valid-p c) (setq c (char-to-string c))) - (setq v (if (char-valid-p (cadr v)) (char-to-string (cadr v)) "")) + (if (characterp c) (setq c (char-to-string c))) + (setq v (if (characterp (cadr v)) (char-to-string (cadr v)) "")) (if (stringp trans-c) (setq trans-c (list trans-c))) (if (stringp trans-v) (setq trans-v (list trans-v))) (indian--puthash-char @@ -576,151 +576,164 @@ FUNCTION will be called 15 times." ;; The followings provide conversion between IS 13194 (ISCII) and UCS. -(defvar ucs-devanagari-to-is13194-alist - '(;;Unicode vs IS13194 ;; only Devanagari is supported now. - (?\x0900 . "[U+0900]") - (?\x0901 . "(5!(B") - (?\x0902 . "(5"(B") - (?\x0903 . "(5#(B") - (?\x0904 . "[U+0904]") - (?\x0905 . "(5$(B") - (?\x0906 . "(5%(B") - (?\x0907 . "(5&(B") - (?\x0908 . "(5'(B") - (?\x0909 . "(5((B") - (?\x090a . "(5)(B") - (?\x090b . "(5*(B") - (?\x090c . "(5&i(B") - (?\x090d . "(5.(B") - (?\x090e . "(5+(B") - (?\x090f . "(5,(B") - (?\x0910 . "(5-(B") - (?\x0911 . "(52(B") - (?\x0912 . "(5/(B") - (?\x0913 . "(50(B") - (?\x0914 . "(51(B") - (?\x0915 . "(53(B") - (?\x0916 . "(54(B") - (?\x0917 . "(55(B") - (?\x0918 . "(56(B") - (?\x0919 . "(57(B") - (?\x091a . "(58(B") - (?\x091b . "(59(B") - (?\x091c . "(5:(B") - (?\x091d . "(5;(B") - (?\x091e . "(5<(B") - (?\x091f . "(5=(B") - (?\x0920 . "(5>(B") - (?\x0921 . "(5?(B") - (?\x0922 . "(5@(B") - (?\x0923 . "(5A(B") - (?\x0924 . "(5B(B") - (?\x0925 . "(5C(B") - (?\x0926 . "(5D(B") - (?\x0927 . "(5E(B") - (?\x0928 . "(5F(B") - (?\x0929 . "(5G(B") - (?\x092a . "(5H(B") - (?\x092b . "(5I(B") - (?\x092c . "(5J(B") - (?\x092d . "(5K(B") - (?\x092e . "(5L(B") - (?\x092f . "(5M(B") - (?\x0930 . "(5O(B") - (?\x0931 . "(5P(B") - (?\x0932 . "(5Q(B") - (?\x0933 . "(5R(B") - (?\x0934 . "(5S(B") - (?\x0935 . "(5T(B") - (?\x0936 . "(5U(B") - (?\x0937 . "(5V(B") - (?\x0938 . "(5W(B") - (?\x0939 . "(5X(B") - (?\x093a . "[U+093a]") - (?\x093b . "[U+093b]") - (?\x093c . "(5i(B") - (?\x093d . "(5ji(B") - (?\x093e . "(5Z(B") - (?\x093f . "(5[(B") - (?\x0940 . "(5\(B") - (?\x0941 . "(5](B") - (?\x0942 . "(5^(B") - (?\x0943 . "(5_(B") - (?\x0944 . "(5_i(B") - (?\x0945 . "(5c(B") - (?\x0946 . "(5`(B") - (?\x0947 . "(5a(B") - (?\x0948 . "(5b(B") - (?\x0949 . "(5g(B") - (?\x094a . "(5d(B") - (?\x094b . "(5e(B") - (?\x094c . "(5f(B") - (?\x094d . "(5h(B") - (?\x094e . "[U+094e]") - (?\x094f . "[U+094f]") - (?\x0950 . "(5!i(B") - (?\x0951 . "(5p5(B") - (?\x0952 . "(5p8(B") - (?\x0953 . "[DEVANAGARI GRAVE ACCENT]") - (?\x0954 . "[DEVANAGARI ACUTE ACCENT]") - (?\x0955 . "[U+0955]") - (?\x0956 . "[U+0956]") - (?\x0957 . "[U+0957]") - (?\x0958 . "(53i(B") - (?\x0959 . "(54i(B") - (?\x095a . "(55i(B") - (?\x095b . "(5:i(B") - (?\x095c . "(5?i(B") - (?\x095d . "(5@i(B") - (?\x095e . "(5Ii(B") - (?\x095f . "(5N(B") - (?\x0960 . "(5*i(B") - (?\x0961 . "(5'i(B") - (?\x0962 . "(5[i(B") - (?\x0963 . "(5ei(B") - (?\x0964 . "(5j(B") - (?\x0965 . "(5jj(B") - (?\x0966 . "(5q(B") - (?\x0967 . "(5r(B") - (?\x0968 . "(5s(B") - (?\x0969 . "(5t(B") - (?\x096a . "(5u(B") - (?\x096b . "(5v(B") - (?\x096c . "(5w(B") - (?\x096d . "(5x(B") - (?\x096e . "(5y(B") - (?\x096f . "(5z(B") - (?\x0970 . "[U+0970]") - (?\x0971 . "[U+0971]") - (?\x0972 . "[U+0972]") - (?\x0973 . "[U+0973]") - (?\x0974 . "[U+0974]") - (?\x0975 . "[U+0975]") - (?\x0976 . "[U+0976]") - (?\x0977 . "[U+0977]") - (?\x0978 . "[U+0978]") - (?\x0979 . "[U+0979]") - (?\x097a . "[U+097a]") - (?\x097b . "[U+097b]") - (?\x097c . "[U+097c]") - (?\x097d . "[U+097d]") - (?\x097e . "[U+097e]") - (?\x097f . "[U+097f]"))) - -(defvar ucs-bengali-to-is13194-alist nil) -(defvar ucs-assamese-to-is13194-alist nil) -(defvar ucs-gurmukhi-to-is13194-alist nil) -(defvar ucs-gujarati-to-is13194-alist nil) -(defvar ucs-oriya-to-is13194-alist nil) -(defvar ucs-tamil-to-is13194-alist nil) -(defvar ucs-telugu-to-is13194-alist nil) -(defvar ucs-malayalam-to-is13194-alist nil) - -(defvar is13194-default-repartory 'devanagari) +(let + ;;Unicode vs IS13194 ;; only Devanagari is supported now. + ((ucs-devanagari-to-is13194-alist + '((?\x0900 . "[U+0900]") + (?\x0901 . "(5!(B") + (?\x0902 . "(5"(B") + (?\x0903 . "(5#(B") + (?\x0904 . "[U+0904]") + (?\x0905 . "(5$(B") + (?\x0906 . "(5%(B") + (?\x0907 . "(5&(B") + (?\x0908 . "(5'(B") + (?\x0909 . "(5((B") + (?\x090a . "(5)(B") + (?\x090b . "(5*(B") + (?\x090c . "(5&i(B") + (?\x090d . "(5.(B") + (?\x090e . "(5+(B") + (?\x090f . "(5,(B") + (?\x0910 . "(5-(B") + (?\x0911 . "(52(B") + (?\x0912 . "(5/(B") + (?\x0913 . "(50(B") + (?\x0914 . "(51(B") + (?\x0915 . "(53(B") + (?\x0916 . "(54(B") + (?\x0917 . "(55(B") + (?\x0918 . "(56(B") + (?\x0919 . "(57(B") + (?\x091a . "(58(B") + (?\x091b . "(59(B") + (?\x091c . "(5:(B") + (?\x091d . "(5;(B") + (?\x091e . "(5<(B") + (?\x091f . "(5=(B") + (?\x0920 . "(5>(B") + (?\x0921 . "(5?(B") + (?\x0922 . "(5@(B") + (?\x0923 . "(5A(B") + (?\x0924 . "(5B(B") + (?\x0925 . "(5C(B") + (?\x0926 . "(5D(B") + (?\x0927 . "(5E(B") + (?\x0928 . "(5F(B") + (?\x0929 . "(5G(B") + (?\x092a . "(5H(B") + (?\x092b . "(5I(B") + (?\x092c . "(5J(B") + (?\x092d . "(5K(B") + (?\x092e . "(5L(B") + (?\x092f . "(5M(B") + (?\x0930 . "(5O(B") + (?\x0931 . "(5P(B") + (?\x0932 . "(5Q(B") + (?\x0933 . "(5R(B") + (?\x0934 . "(5S(B") + (?\x0935 . "(5T(B") + (?\x0936 . "(5U(B") + (?\x0937 . "(5V(B") + (?\x0938 . "(5W(B") + (?\x0939 . "(5X(B") + (?\x093a . "[U+093a]") + (?\x093b . "[U+093b]") + (?\x093c . "(5i(B") + (?\x093d . "(5ji(B") + (?\x093e . "(5Z(B") + (?\x093f . "(5[(B") + (?\x0940 . "(5\(B") + (?\x0941 . "(5](B") + (?\x0942 . "(5^(B") + (?\x0943 . "(5_(B") + (?\x0944 . "(5_i(B") + (?\x0945 . "(5c(B") + (?\x0946 . "(5`(B") + (?\x0947 . "(5a(B") + (?\x0948 . "(5b(B") + (?\x0949 . "(5g(B") + (?\x094a . "(5d(B") + (?\x094b . "(5e(B") + (?\x094c . "(5f(B") + (?\x094d . "(5h(B") + (?\x094e . "[U+094e]") + (?\x094f . "[U+094f]") + (?\x0950 . "(5!i(B") + (?\x0951 . "(5p5(B") + (?\x0952 . "(5p8(B") + (?\x0953 . "[DEVANAGARI GRAVE ACCENT]") + (?\x0954 . "[DEVANAGARI ACUTE ACCENT]") + (?\x0955 . "[U+0955]") + (?\x0956 . "[U+0956]") + (?\x0957 . "[U+0957]") + (?\x0958 . "(53i(B") + (?\x0959 . "(54i(B") + (?\x095a . "(55i(B") + (?\x095b . "(5:i(B") + (?\x095c . "(5?i(B") + (?\x095d . "(5@i(B") + (?\x095e . "(5Ii(B") + (?\x095f . "(5N(B") + (?\x0960 . "(5*i(B") + (?\x0961 . "(5'i(B") + (?\x0962 . "(5[i(B") + (?\x0963 . "(5ei(B") + (?\x0964 . "(5j(B") + (?\x0965 . "(5jj(B") + (?\x0966 . "(5q(B") + (?\x0967 . "(5r(B") + (?\x0968 . "(5s(B") + (?\x0969 . "(5t(B") + (?\x096a . "(5u(B") + (?\x096b . "(5v(B") + (?\x096c . "(5w(B") + (?\x096d . "(5x(B") + (?\x096e . "(5y(B") + (?\x096f . "(5z(B") + (?\x0970 . "[U+0970]") + (?\x0971 . "[U+0971]") + (?\x0972 . "[U+0972]") + (?\x0973 . "[U+0973]") + (?\x0974 . "[U+0974]") + (?\x0975 . "[U+0975]") + (?\x0976 . "[U+0976]") + (?\x0977 . "[U+0977]") + (?\x0978 . "[U+0978]") + (?\x0979 . "[U+0979]") + (?\x097a . "[U+097a]") + (?\x097b . "[U+097b]") + (?\x097c . "[U+097c]") + (?\x097d . "[U+097d]") + (?\x097e . "[U+097e]") + (?\x097f . "[U+097f]"))) + (ucs-bengali-to-is13194-alist nil) + (ucs-assamese-to-is13194-alist nil) + (ucs-gurmukhi-to-is13194-alist nil) + (ucs-gujarati-to-is13194-alist nil) + (ucs-oriya-to-is13194-alist nil) + (ucs-tamil-to-is13194-alist nil) + (ucs-telugu-to-is13194-alist nil) + (ucs-malayalam-to-is13194-alist nil)) + (dolist (script '(devanagari bengali assamese gurmukhi gujarati + oriya tamil telugu malayalam)) + (let ((hashtable (intern (concat "is13194-to-ucs-" + (symbol-name script) "-hashtbl" ))) + (regexp (intern (concat "is13194-to-ucs-" + (symbol-name script) "-regexp")))) + (set hashtable (make-hash-table :test 'equal :size 128)) + (dolist (x (eval (intern (concat "ucs-" (symbol-name script) + "-to-is13194-alist")))) + (put-char-code-property (car x) 'script script) + (put-char-code-property (car x) 'iscii (cdr x)) + (puthash (cdr x) (char-to-string (car x)) (eval hashtable))) + (set regexp (indian-regexp-of-hashtbl-keys (eval hashtable)))))) + +(defvar is13194-default-repertory 'devanagari) (defvar is13194-repertory-to-ucs-script - `((DEF ?\x40 ,is13194-default-repartory) - (RMN ?\x41 ,is13194-default-repartory) + `((DEF ?\x40 ,is13194-default-repertory) + (RMN ?\x41 ,is13194-default-repertory) (DEV ?\x42 devanagari) (BNG ?\x43 bengali) (TML ?\x44 tamil) @@ -752,31 +765,10 @@ FUNCTION will be called 15 times." (defvar is13194-to-ucs-malayalam-hashtbl nil) (defvar is13194-to-ucs-malayalam-regexp nil) -(mapc - (function (lambda (script) - (let ((hashtable (intern (concat "is13194-to-ucs-" - (symbol-name script) "-hashtbl" ))) - (regexp (intern (concat "is13194-to-ucs-" - (symbol-name script) "-regexp")))) - (set hashtable (make-hash-table :test 'equal :size 128)) - (mapc - (function (lambda (x) - (put-char-code-property (decode-char 'ucs (car x)) - 'script script) - (put-char-code-property (decode-char 'ucs (car x)) - 'iscii (cdr x)) - (puthash (cdr x) (char-to-string (decode-char 'ucs (car x))) - (eval hashtable)))) - (eval (intern (concat "ucs-" (symbol-name script) - "-to-is13194-alist")))) - (set regexp (indian-regexp-of-hashtbl-keys (eval hashtable)))))) - '(devanagari bengali assamese gurmukhi gujarati - oriya tamil telugu malayalam)) - (defvar ucs-to-is13194-regexp ;; only Devanagari is supported now. - (concat "[" (char-to-string (decode-char 'ucs #x0900)) - "-" (char-to-string (decode-char 'ucs #x097f)) "]") + (concat "[" (char-to-string #x0900) + "-" (char-to-string #x097f) "]") "Regexp that matches to conversion") (defun ucs-to-iscii-region (from to) @@ -788,11 +780,11 @@ Returns new end position." (save-restriction (narrow-to-region from to) (goto-char (point-min)) - (let* ((current-repertory is13194-default-repartory)) - (while (re-search-forward ucs-to-is13194-regexp nil t) - (replace-match - (get-char-code-property (string-to-char (match-string 0)) - 'iscii)))) + (let* ((current-repertory is13194-default-repertory)) + (while (re-search-forward ucs-to-is13194-regexp nil t) + (replace-match + (get-char-code-property (string-to-char (match-string 0)) + 'iscii)))) (point-max)))) (defun iscii-to-ucs-region (from to) @@ -804,29 +796,34 @@ Returns new end position." (save-restriction (narrow-to-region from to) (goto-char (point-min)) - (let* ((current-repertory is13194-default-repartory) - (current-hashtable - (intern (concat "is13194-to-ucs-" - (symbol-name current-repertory) "-hashtbl"))) - (current-regexp - (intern (concat "is13194-to-ucs-" - (symbol-name current-repertory) "-regexp")))) - (while (re-search-forward (eval current-regexp) nil t) - (replace-match - (gethash (match-string 0) (eval current-hashtable) "")))) + (let* ((current-repertory is13194-default-repertory) + (current-hashtable + (intern (concat "is13194-to-ucs-" + (symbol-name current-repertory) "-hashtbl"))) + (current-regexp + (intern (concat "is13194-to-ucs-" + (symbol-name current-repertory) "-regexp"))) + (re (eval current-regexp)) + (hash (eval current-hashtable))) + (while (re-search-forward re nil t) + (replace-match (gethash (match-string 0) hash "")))) (point-max)))) ;;;###autoload (defun indian-compose-region (from to) - "Compose the region according to `composition-function-table'. " + "Compose the region according to `composition-function-table'." (interactive "r") (save-excursion (save-restriction - (let ((pos from) chars (max to)) - (narrow-to-region from to) - (while (< pos max) - (setq chars (compose-chars-after pos)) - (if chars (setq pos (+ pos chars)) (setq pos (1+ pos)))))))) + (let ((pos from) newpos func (max to)) + (narrow-to-region from to) + (while (< pos max) + (setq func (aref composition-function-table (char-after pos))) + (if (fboundp func) + (setq newpos (funcall func pos nil) + pos (if (and (integerp newpos) (> newpos pos)) + newpos (1+ pos))) + (setq pos (1+ pos)))))))) ;;;###autoload (defun indian-compose-string (string) @@ -857,405 +854,370 @@ Returns new end position." ;;; Backward Compatibility support programs -;; The followings provides the conversion from old-implementation of +;; The following provides the conversion from old-implementation of ;; Emacs Devanagari script to UCS. (defconst indian-2-colum-to-ucs '( ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2120 $(5!!!"!#!$!%!&!'!(!)!*!+!,!-!.!/(B - ("$(5!!(B" . "$,15A(B") - ("$(5!"(B" . "$,15B(B") - ("$(5!#(B" . "$,15C(B") - ("$(5!$(B" . "$,15E(B") - ("$(5!%(B" . "$,15F(B") - ("$(5!&(B" . "$,15G(B") - ("$(5!'(B" . "$,15H(B") - ("$(5!((B" . "$,15I(B") - ("$(5!)(B" . "$,15J(B") - ("$(5!*(B" . "$,15K(B") - ("$(5!*"p(B" . "$,15p6#(B") - ("$(5!+(B" . "$,15N(B") - ("$(5!,(B" . "$,15O(B") - ("$(5!-(B" . "$,15P(B") - ("$(5!.(B" . "$,15M(B") - ("$(5!/(B" . "$,15R(B") + ;;2120 $(6!!!"!#!$!%!&!'!(!)!*!+!,!-!.!/(B + ("$(6!!(B" . "$,15A(B") + ("$(6!"(B" . "$,15B(B") + ("$(6!#(B" . "$,15C(B") + ("$(6!$(B" . "$,15E(B") + ("$(6!%(B" . "$,15F(B") + ("$(6!&(B" . "$,15G(B") + ("$(6!'(B" . "$,15H(B") + ("$(6!((B" . "$,15I(B") + ("$(6!)(B" . "$,15J(B") + ("$(6!*(B" . "$,15K(B") + ("$(6!*"p(B" . "$,15p6#(B") + ("$(6!+(B" . "$,15N(B") + ("$(6!,(B" . "$,15O(B") + ("$(6!-(B" . "$,15P(B") + ("$(6!.(B" . "$,15M(B") + ("$(6!/(B" . "$,15R(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2130 $(5!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>!?(B - ("$(5!0(B" . "$,15S(B") - ("$(5!1(B" . "$,15T(B") - ("$(5!2(B" . "$,15Q(B") - ("$(5!3(B" . "$,15U(B") - ("$(5!4(B" . "$,15V(B") - ("$(5!5(B" . "$,15W(B") - ("$(5!6(B" . "$,15X(B") - ("$(5!7(B" . "$,15Y(B") - ("$(5!8(B" . "$,15Z(B") - ("$(5!9(B" . "$,15[(B") - ("$(5!:(B" . "$,15\(B") - ("$(5!;(B" . "$,15](B") - ("$(5!<(B" . "$,15^(B") - ("$(5!=(B" . "$,15_(B") - ("$(5!>(B" . "$,15`(B") - ("$(5!?(B" . "$,15a(B") + ;;2130 $(6!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>!?(B + ("$(6!0(B" . "$,15S(B") + ("$(6!1(B" . "$,15T(B") + ("$(6!2(B" . "$,15Q(B") + ("$(6!3(B" . "$,15U(B") + ("$(6!4(B" . "$,15V(B") + ("$(6!5(B" . "$,15W(B") + ("$(6!6(B" . "$,15X(B") + ("$(6!7(B" . "$,15Y(B") + ("$(6!8(B" . "$,15Z(B") + ("$(6!9(B" . "$,15[(B") + ("$(6!:(B" . "$,15\(B") + ("$(6!;(B" . "$,15](B") + ("$(6!<(B" . "$,15^(B") + ("$(6!=(B" . "$,15_(B") + ("$(6!>(B" . "$,15`(B") + ("$(6!?(B" . "$,15a(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2140 $(5!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O(B - ("$(5!@(B" . "$,15b(B") - ("$(5!A(B" . "$,15c(B") - ("$(5!B(B" . "$,15d(B") - ("$(5!C(B" . "$,15e(B") - ("$(5!D(B" . "$,15f(B") - ("$(5!E(B" . "$,15g(B") - ("$(5!F(B" . "$,15h(B") - ("$(5!G(B" . "$,15i(B") - ("$(5!H(B" . "$,15j(B") - ("$(5!I(B" . "$,15k(B") - ("$(5!J(B" . "$,15l(B") - ("$(5!K(B" . "$,15m(B") - ("$(5!L(B" . "$,15n(B") - ("$(5!M(B" . "$,15o(B") - ("$(5!N(B" . "$,16?(B") - ("$(5!O(B" . "$,15p(B") + ;;2140 $(6!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O(B + ("$(6!@(B" . "$,15b(B") + ("$(6!A(B" . "$,15c(B") + ("$(6!B(B" . "$,15d(B") + ("$(6!C(B" . "$,15e(B") + ("$(6!D(B" . "$,15f(B") + ("$(6!E(B" . "$,15g(B") + ("$(6!F(B" . "$,15h(B") + ("$(6!G(B" . "$,15i(B") + ("$(6!H(B" . "$,15j(B") + ("$(6!I(B" . "$,15k(B") + ("$(6!J(B" . "$,15l(B") + ("$(6!K(B" . "$,15m(B") + ("$(6!L(B" . "$,15n(B") + ("$(6!M(B" . "$,15o(B") + ("$(6!N(B" . "$,16?(B") + ("$(6!O(B" . "$,15p(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2150 $(5!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_(B - ("$(5!P(B" . "$,15q(B") - ("$(5!Q(B" . "$,15r(B") - ("$(5!R(B" . "$,15s(B") - ("$(5!S(B" . "$,15t(B") - ("$(5!T(B" . "$,15u(B") - ("$(5!U(B" . "$,15v(B") - ("$(5!V(B" . "$,15w(B") - ("$(5!W(B" . "$,15x(B") - ("$(5!X(B" . "$,15y(B") - ("$(5!Z(B" . "$,15~(B") - ("$(5![(B" . "$,15(B") - ("$(5!\(B" . "$,16 (B") - ("$(5!](B" . "$,16!(B") - ("$(5!^(B" . "$,16"(B") - ("$(5!_(B" . "$,16#(B") + ;;2150 $(6!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_(B + ("$(6!P(B" . "$,15q(B") + ("$(6!Q(B" . "$,15r(B") + ("$(6!R(B" . "$,15s(B") + ("$(6!S(B" . "$,15t(B") + ("$(6!T(B" . "$,15u(B") + ("$(6!U(B" . "$,15v(B") + ("$(6!V(B" . "$,15w(B") + ("$(6!W(B" . "$,15x(B") + ("$(6!X(B" . "$,15y(B") + ("$(6!Z(B" . "$,15~(B") + ("$(6![(B" . "$,15(B") + ("$(6!\(B" . "$,16 (B") + ("$(6!](B" . "$,16!(B") + ("$(6!^(B" . "$,16"(B") + ("$(6!_(B" . "$,16#(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2160 $(5!`!a!b!c!d!e!f!g!h!i!j!k!l!m!n!o(B - ("$(5!`(B" . "$,16&(B") - ("$(5!a(B" . "$,16'(B") - ("$(5!b(B" . "$,16((B") - ("$(5!c(B" . "$,16%(B") - ("$(5!d(B" . "$,16*(B") - ("$(5!e(B" . "$,16+(B") - ("$(5!f(B" . "$,16,(B") - ("$(5!g(B" . "$,16)(B") - ("$(5!h(B" . "$,16-(B") - ("$(5!i(B" . "$,15|(B") - ("$(5!j(B" . "$,16D(B") - ("$(5!j!j(B" . "$,16E(B") + ;;2160 $(6!`!a!b!c!d!e!f!g!h!i!j!k!l!m!n!o(B + ("$(6!`(B" . "$,16&(B") + ("$(6!a(B" . "$,16'(B") + ("$(6!b(B" . "$,16((B") + ("$(6!c(B" . "$,16%(B") + ("$(6!d(B" . "$,16*(B") + ("$(6!e(B" . "$,16+(B") + ("$(6!f(B" . "$,16,(B") + ("$(6!g(B" . "$,16)(B") + ("$(6!h(B" . "$,16-(B") + ("$(6!i(B" . "$,15|(B") + ("$(6!j(B" . "$,16D(B") + ("$(6!j!j(B" . "$,16E(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2170 $(5!p!q!r!s!t!u!v!w!x!y!z!{!|!}!~(B - ("$(5!q(B" . "$,16F(B") - ("$(5!r(B" . "$,16G(B") - ("$(5!s(B" . "$,16H(B") - ("$(5!t(B" . "$,16I(B") - ("$(5!u(B" . "$,16J(B") - ("$(5!v(B" . "$,16K(B") - ("$(5!w(B" . "$,16L(B") - ("$(5!x(B" . "$,16M(B") - ("$(5!y(B" . "$,16N(B") - ("$(5!z(B" . "$,16O(B") + ;;2170 $(6!p!q!r!s!t!u!v!w!x!y!z!{!|!}!~(B + ("$(6!q(B" . "$,16F(B") + ("$(6!r(B" . "$,16G(B") + ("$(6!s(B" . "$,16H(B") + ("$(6!t(B" . "$,16I(B") + ("$(6!u(B" . "$,16J(B") + ("$(6!v(B" . "$,16K(B") + ("$(6!w(B" . "$,16L(B") + ("$(6!x(B" . "$,16M(B") + ("$(6!y(B" . "$,16N(B") + ("$(6!z(B" . "$,16O(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2220 $(5"!"""#"$"%"&"'"(")"*"+","-"."/(B - ("$(5"!(B" . "$,16;6-5p(B") - ("$(5""(B" . "$,16>6-5p(B") - ("$(5"#(B" . "$,15U6-5p(B") - ("$(5"$(B" . "$,15W6-5p(B") - ("$(5"%(B" . "$,15d6-5p(B") - ("$(5"&(B" . "$,15j6-5p(B") - ("$(5"'(B" . "$,15k6-5p(B") - ("$(5")(B" . "$,15v6-5p(B") - ("$(5",(B" . "$,15p6!(B") - ("$(5"-(B" . "$,15p6"(B") - ("$(5".(B" . "$,15q6!(B") - ("$(5"/(B" . "$,15q6"(B") + ;;2220 $(6"!"""#"$"%"&"'"(")"*"+","-"."/(B + ("$(6"!(B" . "$,16;6-5p(B") + ("$(6""(B" . "$,16>6-5p(B") + ("$(6"#(B" . "$,15U6-5p(B") + ("$(6"$(B" . "$,15W6-5p(B") + ("$(6"%(B" . "$,15d6-5p(B") + ("$(6"&(B" . "$,15j6-5p(B") + ("$(6"'(B" . "$,15k6-5p(B") + ("$(6")(B" . "$,15v6-5p(B") + ("$(6",(B" . "$,15p6!(B") + ("$(6"-(B" . "$,15p6"(B") + ("$(6".(B" . "$,15q6!(B") + ("$(6"/(B" . "$,15q6"(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2230 $(5"0"1"2"3"4"5"6"7"8"9":";"<"=">"?(B - ("$(5"3(B" . "$,15U6-(B") - ("$(5"4(B" . "$,15V6-(B") - ("$(5"5(B" . "$,15W6-(B") - ("$(5"6(B" . "$,15X6-(B") - ("$(5"8(B" . "$,15Z6-(B") - ("$(5"8"q(B" . "$,15Z6-5p6-(B") - ("$(5":(B" . "$,15\6-(B") - ("$(5";(B" . "$,15]6-(B") - ("$(5"<(B" . "$,15^6-(B") - ("$(5"<(B" . "$,15^6-(B") + ;;2230 $(6"0"1"2"3"4"5"6"7"8"9":";"<"=">"?(B + ("$(6"3(B" . "$,15U6-(B") + ("$(6"4(B" . "$,15V6-(B") + ("$(6"5(B" . "$,15W6-(B") + ("$(6"6(B" . "$,15X6-(B") + ("$(6"8(B" . "$,15Z6-(B") + ("$(6"8"q(B" . "$,15Z6-5p6-(B") + ("$(6":(B" . "$,15\6-(B") + ("$(6";(B" . "$,15]6-(B") + ("$(6"<(B" . "$,15^6-(B") + ("$(6"<(B" . "$,15^6-(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2240 $(5"@"A"B"C"D"E"F"G"H"I"J"K"L"M"N"O(B - ("$(5"A(B" . "$,15c6-(B") - ("$(5"B(B" . "$,15d6-(B") - ("$(5"C(B" . "$,15e6-(B") - ("$(5"E(B" . "$,15g6-(B") - ("$(5"F(B" . "$,15h6-(B") - ("$(5"G(B" . "$,15i6-(B") - ("$(5"H(B" . "$,15j6-(B") - ("$(5"I(B" . "$,15k6-(B") - ("$(5"J(B" . "$,15l6-(B") - ("$(5"J(B" . "$,15l6-(B") - ("$(5"K(B" . "$,15m6-(B") - ("$(5"L(B" . "$,15n6-(B") - ("$(5"M(B" . "$,15o6-(B") - ("$(5"N(B" . "$,16?6-(B") + ;;2240 $(6"@"A"B"C"D"E"F"G"H"I"J"K"L"M"N"O(B + ("$(6"A(B" . "$,15c6-(B") + ("$(6"B(B" . "$,15d6-(B") + ("$(6"C(B" . "$,15e6-(B") + ("$(6"E(B" . "$,15g6-(B") + ("$(6"F(B" . "$,15h6-(B") + ("$(6"G(B" . "$,15i6-(B") + ("$(6"H(B" . "$,15j6-(B") + ("$(6"I(B" . "$,15k6-(B") + ("$(6"J(B" . "$,15l6-(B") + ("$(6"J(B" . "$,15l6-(B") + ("$(6"K(B" . "$,15m6-(B") + ("$(6"L(B" . "$,15n6-(B") + ("$(6"M(B" . "$,15o6-(B") + ("$(6"N(B" . "$,16?6-(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2250 $(5"P"Q"R"S"T"U"V"W"X"Y"Z"["\"]"^"_(B - ("$(5"Q(B" . "$,15r6-(B") - ("$(5"R(B" . "$,15s6-(B") - ("$(5"S(B" . "$,15t6-(B") - ("$(5"T(B" . "$,15u6-(B") - ("$(5"U(B" . "$,15v6-(B") - ("$(5"V(B" . "$,15w6-(B") - ("$(5"W(B" . "$,15x6-(B") - ("$(5"](B" . "$,16-5o(B") + ;;2250 $(6"P"Q"R"S"T"U"V"W"X"Y"Z"["\"]"^"_(B + ("$(6"Q(B" . "$,15r6-(B") + ("$(6"R(B" . "$,15s6-(B") + ("$(6"S(B" . "$,15t6-(B") + ("$(6"T(B" . "$,15u6-(B") + ("$(6"U(B" . "$,15v6-(B") + ("$(6"V(B" . "$,15w6-(B") + ("$(6"W(B" . "$,15x6-(B") + ("$(6"](B" . "$,16-5o(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2260 $(5"`"a"b"c"d"e"f"g"h"i"j"k"l"m"n"o(B - ("$(5"`(B" . "$,15W6-5p6-(B") - ("$(5"a(B" . "$,15X6-5h6-(B") - ("$(5"c(B" . "$,15d6-5d6-(B") - ("$(5"d(B" . "$,15d6-5p6-(B") - ("$(5"e(B" . "$,15g6-5h6-(B") - ("$(5"f(B" . "$,15g6-5p6-(B") - ("$(5"g(B" . "$,15j6-5d6-(B") - ("$(5"h(B" . "$,15v6-5Z6-(B") - ("$(5"i(B" . "$,15v6-5p6-(B") - ("$(5"j(B" . "$,15v6-5u6-(B") - ("$(5"k(B" . "$,15h6-5h6-(B") - ("$(5"l(B" . "$,15U6-5w6-(B") - ("$(5"m(B" . "$,15\6-5^6-(B") + ;;2260 $(6"`"a"b"c"d"e"f"g"h"i"j"k"l"m"n"o(B + ("$(6"`(B" . "$,15W6-5p6-(B") + ("$(6"a(B" . "$,15X6-5h6-(B") + ("$(6"c(B" . "$,15d6-5d6-(B") + ("$(6"d(B" . "$,15d6-5p6-(B") + ("$(6"e(B" . "$,15g6-5h6-(B") + ("$(6"f(B" . "$,15g6-5p6-(B") + ("$(6"g(B" . "$,15j6-5d6-(B") + ("$(6"h(B" . "$,15v6-5Z6-(B") + ("$(6"i(B" . "$,15v6-5p6-(B") + ("$(6"j(B" . "$,15v6-5u6-(B") + ("$(6"k(B" . "$,15h6-5h6-(B") + ("$(6"l(B" . "$,15U6-5w6-(B") + ("$(6"m(B" . "$,15\6-5^6-(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2270 $(5"p"q"r"s"t"u"v"w"x"y"z"{"|"}"~(B - ("$(5"p(B" . "$,15p6-(B") - ("$(5"q(B" . "$,16-5p(B") - ("$(5"r(B" . "$,16-5p(B") - ("$(5"s(B" . "$,1686-(B") - ("$(5"t(B" . "$,1696-(B") - ("$(5"u(B" . "$,16:6-(B") - ("$(5"y(B" . "$,16>6-(B") - ("$(5"z(B" . "$,16;6-(B") + ;;2270 $(6"p"q"r"s"t"u"v"w"x"y"z"{"|"}"~(B + ("$(6"p(B" . "$,15p6-(B") + ("$(6"q(B" . "$,16-5p(B") + ("$(6"r(B" . "$,16-5p(B") + ("$(6"s(B" . "$,1686-(B") + ("$(6"t(B" . "$,1696-(B") + ("$(6"u(B" . "$,16:6-(B") + ("$(6"y(B" . "$,16>6-(B") + ("$(6"z(B" . "$,16;6-(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2320 $(5#!#"###$#%#&#'#(#)#*#+#,#-#.#/(B - ("$(5#!(B" . "$,160(B") - ("$(5#&(B" . "$,15L(B") - ("$(5#&"p(B" . "$,15p6$(B") - ("$(5#'(B" . "$,16A(B") - ("$(5#'"p(B" . "$,15p6C(B") - ("$(5#*(B" . "$,16@(B") - ("$(5#*"p(B" . "$,15p6B(B") + ;;2320 $(6#!#"###$#%#&#'#(#)#*#+#,#-#.#/(B + ("$(6#!(B" . "$,160(B") + ("$(6#&(B" . "$,15L(B") + ("$(6#&"p(B" . "$,15p6$(B") + ("$(6#'(B" . "$,16A(B") + ("$(6#'"p(B" . "$,15p6C(B") + ("$(6#*(B" . "$,16@(B") + ("$(6#*"p(B" . "$,15p6B(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2330 $(5#0#1#2#3#4#5#6#7#8#9#:#;#<#=#>#?(B - ("$(5#3(B" . "$,168(B") - ("$(5#4(B" . "$,169(B") - ("$(5#5(B" . "$,16:(B") - ("$(5#:(B" . "$,16;(B") - ("$(5#?(B" . "$,16<(B") + ;;2330 $(6#0#1#2#3#4#5#6#7#8#9#:#;#<#=#>#?(B + ("$(6#3(B" . "$,168(B") + ("$(6#4(B" . "$,169(B") + ("$(6#5(B" . "$,16:(B") + ("$(6#:(B" . "$,16;(B") + ("$(6#?(B" . "$,16<(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2340 $(5#@#A#B#C#D#E#F#G#H#I#J#K#L#M#N#O(B - ("$(5#@(B" . "$,16=(B") - ("$(5#I(B" . "$,16>(B") - ("$(5#J(B" . "$,15}(B") - ("$(5#K(B" . "$,16$(B") - ("$(5#L(B" . "$,16B(B") - ("$(5#M(B" . "$,16C(B") + ;;2340 $(6#@#A#B#C#D#E#F#G#H#I#J#K#L#M#N#O(B + ("$(6#@(B" . "$,16=(B") + ("$(6#I(B" . "$,16>(B") + ("$(6#J(B" . "$,15}(B") + ("$(6#K(B" . "$,16$(B") + ("$(6#L(B" . "$,16B(B") + ("$(6#M(B" . "$,16C(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2350 $(5#P#Q#R#S#T#U#V#W#X#Y#Z#[#\#]#^#_(B - ("$(5#P(B" . "$,15n6-5h(B") - ("$(5#Q(B" . "$,15n6-5r(B") - ("$(5#R(B" . "$,15y6#(B") + ;;2350 $(6#P#Q#R#S#T#U#V#W#X#Y#Z#[#\#]#^#_(B + ("$(6#P(B" . "$,15n6-5h(B") + ("$(6#Q(B" . "$,15n6-5r(B") + ("$(6#R(B" . "$,15y6#(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2360 $(5#`#a#b#c#d#e#f#g#h#i#j#k#l#m#n#o(B - ("$(5#`(B" . "$,15r6-5r(B") - ("$(5#a(B" . "$,15u6-5h(B") - ("$(5#b(B" . "$,15u6-5u(B") - ("$(5#c(B" . "$,15v6-5Z(B") - ("$(5#d(B" . "$,15v6-5h(B") - ("$(5#e(B" . "$,15v6-5l(B") - ("$(5#f(B" . "$,15v6-5r(B") - ("$(5#g(B" . "$,15v6-5u(B") - ("$(5#h(B" . "$,15w6-5_6-5p6-5o(B") - ("$(5#i(B" . "$,15w6-5_6-5o(B") - ("$(5#j(B" . "$,15w6-5_6-5u(B") - ("$(5#k(B" . "$,15w6-5_(B") - ("$(5#l(B" . "$,15w6-5`(B") - ("$(5#m(B" . "$,15x6-5h(B") - ("$(5#n(B" . "$,15x6-5p(B") + ;;2360 $(6#`#a#b#c#d#e#f#g#h#i#j#k#l#m#n#o(B + ("$(6#`(B" . "$,15r6-5r(B") + ("$(6#a(B" . "$,15u6-5h(B") + ("$(6#b(B" . "$,15u6-5u(B") + ("$(6#c(B" . "$,15v6-5Z(B") + ("$(6#d(B" . "$,15v6-5h(B") + ("$(6#e(B" . "$,15v6-5l(B") + ("$(6#f(B" . "$,15v6-5r(B") + ("$(6#g(B" . "$,15v6-5u(B") + ("$(6#h(B" . "$,15w6-5_6-5p6-5o(B") + ("$(6#i(B" . "$,15w6-5_6-5o(B") + ("$(6#j(B" . "$,15w6-5_6-5u(B") + ("$(6#k(B" . "$,15w6-5_(B") + ("$(6#l(B" . "$,15w6-5`(B") + ("$(6#m(B" . "$,15x6-5h(B") + ("$(6#n(B" . "$,15x6-5p(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2370 $(5#p#q#r#s#t#u#v#w#x#y#z#{#|#}#~(B - ("$(5#p(B" . "$,15y6-5c(B") - ("$(5#q(B" . "$,15y6-5h(B") - ("$(5#r(B" . "$,15y6-5n(B") - ("$(5#s(B" . "$,15y6-5o(B") - ("$(5#t(B" . "$,15y6-5p(B") - ("$(5#u(B" . "$,15y6-5r(B") - ("$(5#v(B" . "$,15y6-5u(B") + ;;2370 $(6#p#q#r#s#t#u#v#w#x#y#z#{#|#}#~(B + ("$(6#p(B" . "$,15y6-5c(B") + ("$(6#q(B" . "$,15y6-5h(B") + ("$(6#r(B" . "$,15y6-5n(B") + ("$(6#s(B" . "$,15y6-5o(B") + ("$(6#t(B" . "$,15y6-5p(B") + ("$(6#u(B" . "$,15y6-5r(B") + ("$(6#v(B" . "$,15y6-5u(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2420 $(5$!$"$#$$$%$&$'$($)$*$+$,$-$.$/(B - ("$(5$!(B" . "$,15U6-5d6-5p6-5o(B") - ("$(5$"(B" . "$,15U6-5d6-5u(B") - ("$(5$#(B" . "$,15U6-5d6-5o(B") - ("$(5$$(B" . "$,15U6-5h6-5o(B") - ("$(5$%(B" . "$,15U6-5p6-5o(B") - ("$(5$&(B" . "$,15U6-5u6-5o(B") - ("$(5$'(B" . "$,15U6-5U(B") - ("$(5$((B" . "$,15U6-5d(B") - ("$(5$)(B" . "$,15U6-5h(B") - ("$(5$*(B" . "$,15U6-5n(B") - ("$(5$+(B" . "$,15U6-5o(B") - ("$(5$,(B" . "$,15U6-5r(B") - ("$(5$-(B" . "$,15U6-5u(B") - ("$(5$.(B" . "$,15U6-5w(B") - ("$(5$/(B" . "$,15X6-5h(B") + ;;2420 $(6$!$"$#$$$%$&$'$($)$*$+$,$-$.$/(B + ("$(6$!(B" . "$,15U6-5d6-5p6-5o(B") + ("$(6$"(B" . "$,15U6-5d6-5u(B") + ("$(6$#(B" . "$,15U6-5d6-5o(B") + ("$(6$$(B" . "$,15U6-5h6-5o(B") + ("$(6$%(B" . "$,15U6-5p6-5o(B") + ("$(6$&(B" . "$,15U6-5u6-5o(B") + ("$(6$'(B" . "$,15U6-5U(B") + ("$(6$((B" . "$,15U6-5d(B") + ("$(6$)(B" . "$,15U6-5h(B") + ("$(6$*(B" . "$,15U6-5n(B") + ("$(6$+(B" . "$,15U6-5o(B") + ("$(6$,(B" . "$,15U6-5r(B") + ("$(6$-(B" . "$,15U6-5u(B") + ("$(6$.(B" . "$,15U6-5w(B") + ("$(6$/(B" . "$,15X6-5h(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2430 $(5$0$1$2$3$4$5$6$7$8$9$:$;$<$=$>$?(B - ("$(5$0(B" . "$,15Y6-5U6-5d6-5o(B") - ("$(5$1(B" . "$,15Y6-5U6-5w6-5u(B") - ("$(5$2(B" . "$,15Y6-5U6-5d(B") - ("$(5$3(B" . "$,15Y6-5U6-5w(B") - ("$(5$4(B" . "$,15Y6-5X6-5p(B") - ("$(5$5(B" . "$,15Y6-5U6-5o(B") - ("$(5$6(B" . "$,15Y6-5V6-5o(B") - ("$(5$7(B" . "$,15Y6-5W6-5o(B") - ("$(5$8(B" . "$,15Y6-5X6-5o(B") - ("$(5$9(B" . "$,15Y6-5U(B") - ("$(5$:(B" . "$,15Y6-5V(B") - ("$(5$;(B" . "$,15Y6-5W(B") - ("$(5$<(B" . "$,15Y6-5X(B") - ("$(5$=(B" . "$,15Y6-5Y(B") - ("$(5$>(B" . "$,15Y6-5h(B") - ("$(5$?(B" . "$,15Y6-5n(B") + ;;2430 $(6$0$1$2$3$4$5$6$7$8$9$:$;$<$=$>$?(B + ("$(6$0(B" . "$,15Y6-5U6-5d6-5o(B") + ("$(6$1(B" . "$,15Y6-5U6-5w6-5u(B") + ("$(6$2(B" . "$,15Y6-5U6-5d(B") + ("$(6$3(B" . "$,15Y6-5U6-5w(B") + ("$(6$4(B" . "$,15Y6-5X6-5p(B") + ("$(6$5(B" . "$,15Y6-5U6-5o(B") + ("$(6$6(B" . "$,15Y6-5V6-5o(B") + ("$(6$7(B" . "$,15Y6-5W6-5o(B") + ("$(6$8(B" . "$,15Y6-5X6-5o(B") + ("$(6$9(B" . "$,15Y6-5U(B") + ("$(6$:(B" . "$,15Y6-5V(B") + ("$(6$;(B" . "$,15Y6-5W(B") + ("$(6$<(B" . "$,15Y6-5X(B") + ("$(6$=(B" . "$,15Y6-5Y(B") + ("$(6$>(B" . "$,15Y6-5h(B") + ("$(6$?(B" . "$,15Y6-5n(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2440 $(5$@$A$B$C$D$E$F$G$H$I$J$K$L$M$N$O(B - ("$(5$@(B" . "$,15Y6-5o(B") - ("$(5$A(B" . "$,15Z6-5Z(B") - ("$(5$B(B" . "$,15Z6-5^(B") - ("$(5$C(B" . "$,15[6-5o(B") - ("$(5$D(B" . "$,15\6-5p(B") - ("$(5$E(B" . "$,15\6-5^(B") - ("$(5$F(B" . "$,15^6-5Z(B") - ("$(5$G(B" . "$,15^6-5\(B") - ("$(5$H(B" . "$,15_6-5U(B") - ("$(5$I(B" . "$,15_6-5_(B") - ("$(5$J(B" . "$,15_6-5`(B") - ("$(5$K(B" . "$,15_6-5o(B") - ("$(5$L(B" . "$,15`6-5o(B") - ("$(5$M(B" . "$,15a6-5W6-5o(B") - ("$(5$N(B" . "$,15a6-5X6-5p(B") - ("$(5$O(B" . "$,15a6-5p6-5o(B") + ;;2440 $(6$@$A$B$C$D$E$F$G$H$I$J$K$L$M$N$O(B + ("$(6$@(B" . "$,15Y6-5o(B") + ("$(6$A(B" . "$,15Z6-5Z(B") + ("$(6$B(B" . "$,15Z6-5^(B") + ("$(6$C(B" . "$,15[6-5o(B") + ("$(6$D(B" . "$,15\6-5p(B") + ("$(6$E(B" . "$,15\6-5^(B") + ("$(6$F(B" . "$,15^6-5Z(B") + ("$(6$G(B" . "$,15^6-5\(B") + ("$(6$H(B" . "$,15_6-5U(B") + ("$(6$I(B" . "$,15_6-5_(B") + ("$(6$J(B" . "$,15_6-5`(B") + ("$(6$K(B" . "$,15_6-5o(B") + ("$(6$L(B" . "$,15`6-5o(B") + ("$(6$M(B" . "$,15a6-5W6-5o(B") + ("$(6$N(B" . "$,15a6-5X6-5p(B") + ("$(6$O(B" . "$,15a6-5p6-5o(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2450 $(5$P$Q$R$S$T$U$V$W$X$Y$Z$[$\$]$^$_(B - ("$(5$P(B" . "$,15a6-5W(B") - ("$(5$Q(B" . "$,15a6-5X(B") - ("$(5$R(B" . "$,15a6-5a(B") - ("$(5$S(B" . "$,15a6-5n(B") - ("$(5$T(B" . "$,15a6-5o(B") + ;;2450 $(6$P$Q$R$S$T$U$V$W$X$Y$Z$[$\$]$^$_(B + ("$(6$P(B" . "$,15a6-5W(B") + ("$(6$Q(B" . "$,15a6-5X(B") + ("$(6$R(B" . "$,15a6-5a(B") + ("$(6$S(B" . "$,15a6-5n(B") + ("$(6$T(B" . "$,15a6-5o(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2460 $(5$`$a$b$c$d$e$f$g$h$i$j$k$l$m$n$o(B - ("$(5$`(B" . "$,15b6-5o(B") - ("$(5$a(B" . "$,15d6-5d(B") - ("$(5$b(B" . "$,15d6-5h(B") - ("$(5$c(B" . "$,15f6-5f6-5o(B") - ("$(5$d(B" . "$,15f6-5g6-5o(B") - ("$(5$e(B" . "$,15f6-5m6-5o(B") - ("$(5$f(B" . "$,15f6-5p6-5o(B") - ("$(5$g(B" . "$,15f6-5u6-5o(B") - ("$(5$h(B" . "$,15f6-5W6-5p(B") - ("$(5$i(B" . "$,15f6-5X6-5p(B") - ("$(5$j(B" . "$,15f6-5f6-5u(B") - ("$(5$k(B" . "$,15f6-5g6-5u(B") - ("$(5$l(B" . "$,15f6-5W(B") - ("$(5$m(B" . "$,15f6-5X(B") - ("$(5$n(B" . "$,15f6-5f(B") - ("$(5$o(B" . "$,15f6-5g(B") + ;;2460 $(6$`$a$b$c$d$e$f$g$h$i$j$k$l$m$n$o(B + ("$(6$`(B" . "$,15b6-5o(B") + ("$(6$a(B" . "$,15d6-5d(B") + ("$(6$b(B" . "$,15d6-5h(B") + ("$(6$c(B" . "$,15f6-5f6-5o(B") + ("$(6$d(B" . "$,15f6-5g6-5o(B") + ("$(6$e(B" . "$,15f6-5m6-5o(B") + ("$(6$f(B" . "$,15f6-5p6-5o(B") + ("$(6$g(B" . "$,15f6-5u6-5o(B") + ("$(6$h(B" . "$,15f6-5W6-5p(B") + ("$(6$i(B" . "$,15f6-5X6-5p(B") + ("$(6$j(B" . "$,15f6-5f6-5u(B") + ("$(6$k(B" . "$,15f6-5g6-5u(B") + ("$(6$l(B" . "$,15f6-5W(B") + ("$(6$m(B" . "$,15f6-5X(B") + ("$(6$n(B" . "$,15f6-5f(B") + ("$(6$o(B" . "$,15f6-5g(B") ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f - ;;2470 $(5$p$q$r$s$t$u$v$w$x$y$z${$|$}$~(B - ("$(5$p(B" . "$,15f6-5h(B") - ("$(5$q(B" . "$,15f6-5l(B") - ("$(5$r(B" . "$,15f6-5m(B") - ("$(5$s(B" . "$,15f6-5n(B") - ("$(5$t(B" . "$,15f6-5o(B") - ("$(5$u(B" . "$,15f6-5u(B") - ("$(5$v(B" . "$,15g6-5h(B") - ("$(5$w(B" . "$,15h6-5h(B") - ("$(5$x(B" . "$,15j6-5d(B") - ("$(5$y(B" . "$,15j6-5h(B") - ("$(5$z(B" . "$,15j6-5r(B") - ("$(5${(B" . "$,15l6-5h(B") - ("$(5$|(B" . "$,15l6-5l(B") - ("$(5$}(B" . "$,15l6-5u(B") - ("$(5$~(B" . "$,15m6-5h(B"))) + ;;2470 $(6$p$q$r$s$t$u$v$w$x$y$z${$|$}$~(B + ("$(6$p(B" . "$,15f6-5h(B") + ("$(6$q(B" . "$,15f6-5l(B") + ("$(6$r(B" . "$,15f6-5m(B") + ("$(6$s(B" . "$,15f6-5n(B") + ("$(6$t(B" . "$,15f6-5o(B") + ("$(6$u(B" . "$,15f6-5u(B") + ("$(6$v(B" . "$,15g6-5h(B") + ("$(6$w(B" . "$,15h6-5h(B") + ("$(6$x(B" . "$,15j6-5d(B") + ("$(6$y(B" . "$,15j6-5h(B") + ("$(6$z(B" . "$,15j6-5r(B") + ("$(6${(B" . "$,15l6-5h(B") + ("$(6$|(B" . "$,15l6-5l(B") + ("$(6$}(B" . "$,15l6-5u(B") + ("$(6$~(B" . "$,15m6-5h(B"))) (defconst indian-2-column-to-ucs-regexp - "$(5!j!j(B\\|$(5"8"q(B\\|[$(5#&#'!*#*(B]$(5"p(B\\|[$(5!!(B-$(5$~(B]") + "$(6!j!j(B\\|$(6"8"q(B\\|[$(6#&#'!*#*(B]$(6"p(B\\|[$(6!!(B-$(6$~(B]") (put 'indian-2-column-to-ucs-chartable 'char-table-extra-slots 1) (defconst indian-2-column-to-ucs-chartable (let ((table (make-char-table 'indian-2-column-to-ucs-chartable)) - (alist nil)) + (alist nil)) (dolist (elt indian-2-colum-to-ucs) (if (= (length (car elt)) 1) - (aset table (aref (car elt) 0) (cdr elt)) - (setq alist (cons elt alist)))) + (aset table (aref (car elt) 0) (cdr elt)) + (setq alist (cons elt alist)))) (set-char-table-extra-slot table 0 alist) table)) +;;;###autoload (defun indian-2-column-to-ucs-region (from to) "Convert old Emacs Devanagari characters to UCS." (interactive "r") (save-excursion (save-restriction (let ((pos from) - (alist (char-table-extra-slot indian-2-column-to-ucs-chartable 0))) - (narrow-to-region from to) - (decompose-region from to) - (goto-char (point-min)) - (while (re-search-forward indian-2-column-to-ucs-regexp nil t) - (let ((len (- (match-end 0) (match-beginning 0))) - subst) - (if (= len 1) - (setq subst (aref indian-2-column-to-ucs-chartable + (alist (char-table-extra-slot indian-2-column-to-ucs-chartable 0))) + (narrow-to-region from to) + (decompose-region from to) + (goto-char (point-min)) + (while (re-search-forward indian-2-column-to-ucs-regexp nil t) + (let ((len (- (match-end 0) (match-beginning 0))) + subst) + (if (= len 1) + (setq subst (aref indian-2-column-to-ucs-chartable (char-after (match-beginning 0)))) - (setq subst (cdr (assoc (match-string 0) alist)))) - (replace-match (if subst subst "?")))) - (indian-compose-region (point-min) (point-max)))))) - -;;;###autoload -(defun indian-glyph-char (index &optional script) - "Return character of charset `indian-glyph' made from glyph index INDEX. -The variable `indian-default-script' specifies the script of the glyph. -Optional argument SCRIPT, if non-nil, overrides `indian-default-script'. -See also the function `indian-char-glyph'." - (or script - (setq script indian-default-script)) - (let ((offset (get script 'indian-glyph-code-offset))) - (or (integerp offset) - (error "Invalid script name: %s" script)) - (or (and (>= index 0) (< index 256)) - (error "Invalid glyph index: %d" index)) - (setq index (+ offset index)) - (make-char 'indian-glyph (+ (/ index 96) 32) (+ (% index 96) 32)))) - -(defvar indian-glyph-max-char - (indian-glyph-char - 255 (aref indian-script-table (1- (length indian-script-table)))) - "The maximum valid code of characters in the charset `indian-glyph'.") - -;;;###autoload -(defun indian-char-glyph (char) - "Return information about the glyph code for CHAR of `indian-glyph' charset. -The value is (INDEX . SCRIPT), where INDEX is the glyph index -in the font that Indian script name SCRIPT specifies. -See also the function `indian-glyph-char'." - (let ((split (split-char char)) - code) - (or (eq (car split) 'indian-glyph) - (error "Charset of `%c' is not indian-glyph" char)) - (or (<= char indian-glyph-max-char) - (error "Invalid indian-glyph char: %d" char)) - (setq code (+ (* (- (nth 1 split) 32) 96) (nth 2 split) -32)) - (cons (% code 256) (aref indian-script-table (/ code 256))))) + (setq subst (cdr (assoc (match-string 0) alist)))) + (replace-match (if subst subst "?")))) + (indian-compose-region (point-min) (point-max)))))) (provide 'ind-util) diff --git a/lisp/language/indian.el b/lisp/language/indian.el index 47803f22342..d7b4c365bc8 100644 --- a/lisp/language/indian.el +++ b/lisp/language/indian.el @@ -29,21 +29,16 @@ ;;; Code: -(make-coding-system - 'in-is13194 2 ?D - "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)." - '(ascii indian-is13194 nil nil - nil ascii-eol) - `((safe-chars . ,(let ((table (make-char-table 'safe-chars nil))) - (set-char-table-range table 'indian-is13194 t) - (dotimes (i 127) - (aset table i t) - (aset table (decode-char 'ucs (+ #x900 i)) t)) - table)) - (post-read-conversion . in-is13194-post-read-conversion) - (pre-write-conversion . in-is13194-pre-write-conversion))) - -(define-coding-system-alias 'devanagari 'in-is13194) +(define-coding-system 'in-is13194-devanagari + "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)." + :coding-type 'iso-2022 + :mnemonic ?D + :designation [ascii indian-is13194 nil nil] + :charset-list '(ascii indian-is13194) + :post-read-conversion 'in-is13194-post-read-conversion + :pre-write-conversion 'in-is13194-pre-write-conversion) + +(define-coding-system-alias 'devanagari 'in-is13194-devanagari) (defvar indian-font-foundry 'cdac "Font foundry for Indian characters. @@ -156,18 +151,9 @@ Each Indian language environment sets this value to one of `indian-script-table' (which see). The default value is `devanagari'.") -(define-ccl-program ccl-encode-indian-glyph-font - `(0 - ;; Shorten (r1 = (((((r1 - 32) * 96) + r2) - 32) % 256)) - (r1 = ((((r1 * 96) + r2) - ,(+ (* 32 96) 32)) % 256)))) - -(setq font-ccl-encoder-alist - (cons (cons "-CDAC" 'ccl-encode-indian-glyph-font) - font-ccl-encoder-alist)) - -(setq font-ccl-encoder-alist - (cons (cons "-AKRUTI" 'ccl-encode-indian-glyph-font) - font-ccl-encoder-alist)) +(defvar indian-composable-pattern + (make-char-table nil) + "Char table of regexps for composable Indian character sequence.") (provide 'indian) diff --git a/lisp/language/japanese.el b/lisp/language/japanese.el index 64c74e02a0d..ea90ac19f56 100644 --- a/lisp/language/japanese.el +++ b/lisp/language/japanese.el @@ -1,7 +1,10 @@ ;;; japanese.el --- support for Japanese -*- coding: iso-2022-7bit; no-byte-compile: t -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 ;; Keywords: multilingual, Japanese @@ -29,64 +32,77 @@ ;;; Code: -(make-coding-system - 'iso-2022-jp 2 ?J - "ISO 2022 based 7bit encoding for Japanese (MIME:ISO-2022-JP)." - '((ascii japanese-jisx0208-1978 japanese-jisx0208 - latin-jisx0201 japanese-jisx0212) nil nil nil - short ascii-eol ascii-cntl seven) - '((safe-charsets ascii japanese-jisx0208-1978 japanese-jisx0208 - latin-jisx0201 japanese-jisx0212) - (mime-charset . iso-2022-jp))) +(define-coding-system 'iso-2022-jp + "ISO 2022 based 7bit encoding for Japanese (MIME:ISO-2022-JP)." + :coding-type 'iso-2022 + :mnemonic ?J + :designation [(ascii japanese-jisx0208-1978 japanese-jisx0208 + latin-jisx0201 japanese-jisx0212) + nil nil nil] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation) + :charset-list '(ascii japanese-jisx0208-1978 japanese-jisx0208 + latin-jisx0201 japanese-jisx0212) + :mime-charset 'iso-2022-jp) (define-coding-system-alias 'junet 'iso-2022-jp) -(make-coding-system - 'iso-2022-jp-2 2 ?J - "ISO 2022 based 7bit encoding for CJK, Latin-1, and Greek (MIME:ISO-2022-JP-2)." - '((ascii japanese-jisx0208-1978 japanese-jisx0208 - latin-jisx0201 japanese-jisx0212 - chinese-gb2312 korean-ksc5601) nil - (nil latin-iso8859-1 greek-iso8859-7) nil - short ascii-eol ascii-cntl seven nil single-shift nil nil nil init-bol) - '((safe-charsets ascii japanese-jisx0208-1978 japanese-jisx0208 - latin-jisx0201 japanese-jisx0212 - chinese-gb2312 korean-ksc5601 - latin-iso8859-1 greek-iso8859-7) - (mime-charset . iso-2022-jp-2))) - -(make-coding-system - 'japanese-shift-jis 1 ?S - "Shift-JIS 8-bit encoding for Japanese (MIME:SHIFT_JIS)." - nil - '((safe-charsets ascii japanese-jisx0208 japanese-jisx0208-1978 - latin-jisx0201 katakana-jisx0201) - (mime-charset . shift_jis) - (charset-origin-alist (japanese-jisx0208 "SJIS" encode-sjis-char) - (katakana-jisx0201 "SJIS" encode-sjis-char)))) +(define-coding-system 'iso-2022-jp-2 + "ISO 2022 based 7bit encoding for CJK, Latin-1, Greek (MIME:ISO-2022-JP-2)." + :coding-type 'iso-2022 + :mnemonic ?J + :designation [(ascii japanese-jisx0208-1978 japanese-jisx0208 + latin-jisx0201 japanese-jisx0212 + chinese-gb2312 korean-ksc5601) + nil + (nil latin-iso8859-1 greek-iso8859-7) + nil] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation single-shift + init-at-bol) + :charset-list '(ascii japanese-jisx0208-1978 japanese-jisx0208 + latin-jisx0201 japanese-jisx0212 + chinese-gb2312 korean-ksc5601 + latin-iso8859-1 greek-iso8859-7) + :mime-charset 'iso-2022-jp-2) + +(define-coding-system 'japanese-shift-jis + "Shift-JIS 8-bit encoding for Japanese (MIME:SHIFT_JIS)" + :coding-type 'shift-jis + :mnemonic ?S + :charset-list '(ascii katakana-jisx0201 japanese-jisx0208) + :mime-charset 'shift_jis) (define-coding-system-alias 'shift_jis 'japanese-shift-jis) (define-coding-system-alias 'sjis 'japanese-shift-jis) -(make-coding-system - 'japanese-iso-7bit-1978-irv 2 ?j - "ISO 2022 based 7-bit encoding for Japanese JISX0208-1978 and JISX0201-Roman." - '((ascii japanese-jisx0208-1978 japanese-jisx0208 - latin-jisx0201 japanese-jisx0212 katakana-jisx0201 t) nil nil nil - short ascii-eol ascii-cntl seven nil nil use-roman use-oldjis) - '(ascii japanese-jisx0208-1978 japanese-jisx0208 latin-jisx0201)) +;; Fixme: AKA Shift-JIS according to +;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is +;; that correct? + +(define-coding-system 'japanese-iso-7bit-1978-irv + "ISO 2022 based 7-bit encoding for Japanese JISX0208-1978 and JISX0201-Roman." + :coding-type 'iso-2022 + :mnemonic ?j + :designation [(latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208 + japanese-jisx0212 katakana-jisx0201) + nil nil nil] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation + use-roman use-oldjis) + :charset-list '(ascii latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208 + japanese-jisx0212)) (define-coding-system-alias 'iso-2022-jp-1978-irv 'japanese-iso-7bit-1978-irv) (define-coding-system-alias 'old-jis 'japanese-iso-7bit-1978-irv) -(make-coding-system - 'japanese-iso-8bit 2 ?E - "ISO 2022 based EUC encoding for Japanese (MIME:EUC-JP)." - '(ascii japanese-jisx0208 katakana-jisx0201 japanese-jisx0212 - short ascii-eol ascii-cntl nil nil single-shift) - '((safe-charsets ascii latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978 - katakana-jisx0201 japanese-jisx0212) - (mime-charset . euc-jp))) +(define-coding-system 'japanese-iso-8bit + "ISO 2022 based EUC encoding for Japanese (MIME:EUC-JP)." + :coding-type 'iso-2022 + :mnemonic ?E + :designation [ascii japanese-jisx0208 katakana-jisx0201 japanese-jisx0212] + :flags '(short ascii-at-eol ascii-at-cntl single-shift) + :charset-list '(ascii latin-jisx0201 japanese-jisx0208 + japanese-jisx0208-1978 + katakana-jisx0201 japanese-jisx0212) + :mime-charset 'euc-jp) (define-coding-system-alias 'euc-japan-1990 'japanese-iso-8bit) (define-coding-system-alias 'euc-japan 'japanese-iso-8bit) diff --git a/lisp/language/korean.el b/lisp/language/korean.el index 4dbc2cb5b8a..f010de69898 100644 --- a/lisp/language/korean.el +++ b/lisp/language/korean.el @@ -1,7 +1,10 @@ ;;; korean.el --- support for Korean -*- coding: iso-2022-7bit; no-byte-compile: t -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 ;; Keywords: multilingual, Korean @@ -28,25 +31,26 @@ ;;; Code: -(make-coding-system - 'korean-iso-8bit 2 ?K - "ISO 2022 based EUC encoding for Korean KSC5601 (MIME:EUC-KR)." - '(ascii korean-ksc5601 nil nil - nil ascii-eol ascii-cntl) - '((safe-charsets ascii korean-ksc5601) - (mime-charset . euc-kr))) +(define-coding-system 'korean-iso-8bit + "ISO 2022 based EUC encoding for Korean KSC5601 (MIME:EUC-KR)." + :coding-type 'iso-2022 + :mnemonic ?K + :designation [ascii korean-ksc5601 nil nil] + :charset-list '(ascii korean-ksc5601) + :mime-charset 'euc-kr) (define-coding-system-alias 'euc-kr 'korean-iso-8bit) (define-coding-system-alias 'euc-korea 'korean-iso-8bit) -(make-coding-system - 'iso-2022-kr 2 ?k - "ISO 2022 based 7-bit encoding for Korean KSC5601 (MIME:ISO-2022-KR)." - '(ascii (nil korean-ksc5601) nil nil - nil ascii-eol ascii-cntl seven locking-shift nil nil nil nil nil - designation-bol) - '((safe-charsets ascii korean-ksc5601) - (mime-charset . iso-2022-kr))) +(define-coding-system 'iso-2022-kr + "ISO 2022 based 7-bit encoding for Korean KSC5601 (MIME:ISO-2022-KR)." + :coding-type 'iso-2022 + :mnemonic ?k + :designation [ascii (nil korean-ksc5601) nil nil] + :flags '(ascii-at-eol ascii-at-cntl 7-bit designation locking-shift + designation-bol) + :charset-list '(ascii korean-ksc5601) + :mime-charset 'iso-2022-kr) (define-coding-system-alias 'korean-iso-7bit-lock 'iso-2022-kr) diff --git a/lisp/language/lao-util.el b/lisp/language/lao-util.el index 7105ae1beb6..4db213dab02 100644 --- a/lisp/language/lao-util.el +++ b/lisp/language/lao-util.el @@ -1,10 +1,13 @@ ;;; lao-util.el --- utilities for Lao -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. ;; Copyright (C) 2001 Free Software Foundation, Inc. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 -;; Keywords: multilingual, Lao +;; Keywords: multilingual, Lao, i18n ;; This file is part of GNU Emacs. @@ -128,73 +131,7 @@ (?(1{(B invalid nil) (?(1|(B consonant "LETTER NHOR NHUU" "MOUSE") (?(1}(B consonant "LETTER MHOR MHAR" "DOG") - (?(1~(B invalid nil) - ;; Unicode equivalents - (?$,1D!(B consonant "LETTER KOR KAI'" "CHICKEN") - (?$,1D"(B consonant "LETTER KHOR KHAI'" "EGG") - (?$,1D$(B consonant "LETTER QHOR QHWARGN" "BUFFALO") - (?$,1D'(B consonant "LETTER NGOR NGUU" "SNAKE") - (?$,1D((B consonant "LETTER JOR JUA" "BUDDHIST NOVICE") - (?$,1D*(B consonant "LETTER XOR X\"ARNG" "ELEPHANT") - (?$,1D-(B consonant "LETTER YOR YUNG" "MOSQUITO") - (?$,1D4(B consonant "LETTER DOR DANG" "NOSE") - (?$,1D5(B consonant "LETTER TOR TAR" "EYE") - (?$,1D6(B consonant "LETTER THOR THUNG" "TO ASK,QUESTION") - (?$,1D7(B consonant "LETTER DHOR DHARM" "FLAG") - (?$,1D9(B consonant "LETTER NOR NOK" "BIRD") - (?$,1D:(B consonant "LETTER BOR BED" "FISHHOOK") - (?$,1D;(B consonant "LETTER POR PAR" "FISH") - (?$,1D<(B consonant "LETTER HPOR HPER\"" "BEE") - (?$,1D=(B consonant "LETTER FHOR FHAR" "WALL") - (?$,1D>(B consonant "LETTER PHOR PHUU" "MOUNTAIN") - (?$,1D?(B consonant "LETTER FOR FAI" "FIRE") - (?$,1DA(B consonant "LETTER MOR MAR\"" "HORSE") - (?$,1DB(B consonant "LETTER GNOR GNAR" "MEDICINE") - (?$,1DC(B consonant "LETTER ROR ROD" "CAR") - (?$,1DE(B consonant "LETTER LOR LIING" "MONKEY") - (?$,1DG(B consonant "LETTER WOR WII" "HAND FAN") - (?$,1DJ(B consonant "LETTER SOR SEA" "TIGER") - (?$,1DK(B consonant "LETTER HHOR HHAI" "JAR") - (?$,1DM(B consonant "LETTER OR OOW" "TAKE") - (?$,1DN(B consonant "LETTER HOR HEA" "BOAT") - (?$,1DO(B special "ELLIPSIS") - (?$,1DP(B vowel-base "VOWEL SIGN SARA A") - (?$,1DQ(B vowel-upper "VOWEL SIGN MAI KAN") - (?$,1DR(B vowel-base "VOWEL SIGN SARA AR") - (?$,1DS(B vowel-base "VOWEL SIGN SARA AM") - (?$,1DT(B vowel-upper "VOWEL SIGN SARA I") - (?$,1DU(B vowel-upper "VOWEL SIGN SARA II") - (?$,1DV(B vowel-upper "VOWEL SIGN SARA EU") - (?$,1DW(B vowel-upper "VOWEL SIGN SARA UR") - (?$,1DX(B vowel-lower "VOWEL SIGN SARA U") - (?$,1DY(B vowel-lower "VOWEL SIGN SARA UU") - (?$,1D[(B vowel-upper "VOWEL SIGN MAI KONG") - (?$,1D\(B semivowel-lower "SEMIVOWEL SIGN LO") - (?$,1D](B vowel-base "SEMIVOWEL SIGN SARA IA") - (?$,1D`(B vowel-base "VOWEL SIGN SARA EE") - (?$,1Da(B vowel-base "VOWEL SIGN SARA AA") - (?$,1Db(B vowel-base "VOWEL SIGN SARA OO") - (?$,1Dc(B vowel-base "VOWEL SIGN SARA EI MAI MUAN\"") - (?$,1Dd(B vowel-base "VOWEL SIGN SARA AI MAI MAY") - (?$,1Df(B special "KO LA (REPETITION)") - (?$,1Dh(B tone "TONE MAI EK") - (?$,1Di(B tone "TONE MAI THO") - (?$,1Dj(B tone "TONE MAI TI") - (?$,1Dk(B tone "TONE MAI JADTAWAR") - (?$,1Dl(B tone "CANCELLATION MARK") - (?$,1Dm(B vowel-upper "VOWEL SIGN SARA OR") - (?$,1Dp(B special "DIGIT ZERO") - (?$,1Dq(B special "DIGIT ONE") - (?$,1Dr(B special "DIGIT TWO") - (?$,1Ds(B special "DIGIT THREE") - (?$,1Dt(B special "DIGIT FOUR") - (?$,1Du(B special "DIGIT FIVE") - (?$,1Dv(B special "DIGIT SIX") - (?$,1Dw(B special "DIGIT SEVEN") - (?$,1Dx(B special "DIGIT EIGHT") - (?$,1Dy(B special "DIGIT NINE") - (?$,1D|(B consonant "LETTER NHOR NHUU" "MOUSE") - (?$,1D}(B consonant "LETTER MHOR MHAR" "DOG"))) + (?(1~(B invalid nil))) elm) (while l (setq elm (car l) l (cdr l)) @@ -219,10 +156,10 @@ ;; CV -> C, CT -> C, CVT -> C, Cv -> C, CvT -> C ;; v v ;; T -;; V T V T -;; CsV -> C, CsT -> C, CsVT -> C, Csv -> C, CvT -> C -;; s s s s s -;; v v +;; V T V T +;; CsV -> C, CsT -> C, CsVT -> C, Csv -> C, CsvT -> C +;; s s s s s +;; v v ;; where C: consonant, V: vowel upper, v: vowel lower, @@ -422,37 +359,7 @@ ("(1d(B" (?(1d(B 0)) ("(1c(B" (?(1c(B 0)) ("(1`[R(B" (?(1`(B 0 ?(1[(B ?(1R(B)) - ("(1S(B" (0 ?(1S(B)) - - ;; Unicode equivalents - ("$,1DP(B" (0 ?$,1DP(B) (0 ?$,1DQ(B)) - ("$,1DR(B" (0 ?$,1DR(B)) - ("$,1DT(B" (0 ?$,1DU(B)) - ("$,1DU(B" (0 ?$,1DU(B)) - ("$,1DV(B" (0 ?$,1DV(B)) - ("$,1DW(B" (0 ?$,1DW(B)) - ("$,1DX(B" (0 ?$,1DX(B)) - ("$,1DY(B" (0 ?$,1DY(B)) - ("$,1D`DP(B" (?$,1D`(B 0 ?$,1DP(B) (?$,1D`(B 0 ?$,1DQ(B)) - ("$,1D`(B" (?$,1D`(B 0)) - ("$,1DaDP(B" (?$,1Da(B 0 ?$,1DP(B) (?$,1Da(B 0 ?$,1DQ(B)) - ("$,1Da(B" (?$,1Da(B 0)) - ("$,1DbDP(B" (?$,1Db(B 0 ?$,1DP(B) (0 ?$,1D[(B) (?$,1D-(B ?$,1Db(B 0 ?$,1DQ(B) (?$,1DG(B ?$,1Db(B 0 ?$,1DQ(B)) - ("$,1Db(B" (?$,1Db(B 0)) - ("$,1D`DRDP(B" (?$,1D`(B 0 ?$,1DR(B ?$,1DP(B) (0 ?$,1DQ(B ?$,1DM(B)) - ("$,1Dm(B" (0 ?$,1Dm(B) (0 ?$,1DM(B)) - ("$,1D`DT(B" (?$,1D`(B 0 ?$,1DT(B)) - ("$,1D`DU(B" (?$,1D`(B 0 ?$,1DU(B)) - ("$,1D[DGDP(B" (0 ?$,1D[(B ?$,1DG(B ?$,1DP(B) (0 ?$,1DQ(B ?$,1DG(B)) - ("$,1D[DG(B" (0 ?$,1D[(B ?$,1DG(B) (0 ?$,1DG(B)) - ("$,1D`DQD]DP(B" (?$,1D`(B 0 ?$,1DQ(B ?$,1D](B ?$,1DP(B) (0 ?$,1DQ(B ?$,1D](B)) - ("$,1D`DQD](B" (?$,1D`(B 0 ?$,1DQ(B ?$,1D](B) (0 ?$,1D](B)) - ("$,1D`DVDM(B" (?$,1D`(B 0 ?$,1DV(B ?$,1DM(B)) - ("$,1D`DWDM(B" (?$,1D`(B 0 ?$,1DW(B ?$,1DM(B)) - ("$,1Dd(B" (?$,1Dd(B 0)) - ("$,1Dc(B" (?$,1Dc(B 0)) - ("$,1D`D[DR(B" (?$,1D`(B 0 ?$,1D[(B ?$,1DR(B)) - ("$,1DS(B" (0 ?$,1DS(B))) + ("(1S(B" (0 ?(1S(B))) "Alist of Lao vowel string vs the corresponding re-ordering rule. Each element has this form: (VOWEL NO-MAA-SAKOD-RULE WITH-MAA-SAKOD-RULE (MAA-SAKOD-0 RULE-0) ...) @@ -583,24 +490,20 @@ syllable. In that case, FROM and TO are indexes to STR." lao-str))) ;;;###autoload -(defun lao-post-read-conversion (len) - (lao-compose-region (point) (+ (point) len)) - len) - -;;;###autoload -(defun lao-composition-function (from to pattern &optional string) - "Compose Lao text in the region FROM and TO. -The text matches the regular expression PATTERN. -Optional 4th argument STRING, if non-nil, is a string containing text -to compose. - -The return value is number of composed characters." - (if (< (1+ from) to) - (progn - (if string - (compose-string string from to) - (compose-region from to)) - (- to from)))) +(defun lao-composition-function (pos &optional string) + (setq pos (1- pos)) + (with-category-table lao-category-table + (if string + (if (and (>= pos 0) + (eq (string-match lao-composition-pattern string pos) pos)) + (prog1 (match-end 0) + (compose-string string pos (match-end 0)))) + (if (>= pos (point-min)) + (save-excursion + (goto-char pos) + (if (looking-at lao-composition-pattern) + (prog1 (match-end 0) + (compose-region pos (match-end 0))))))))) ;;;###autoload (defun lao-compose-region (from to) diff --git a/lisp/language/lao.el b/lisp/language/lao.el index 72e90930abb..8edc282a46c 100644 --- a/lisp/language/lao.el +++ b/lisp/language/lao.el @@ -1,8 +1,11 @@ ;;; lao.el --- support for Lao -*- coding: iso-2022-7bit; no-byte-compile: t -*- ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. ;; Copyright (C) 2001 Free Software Foundation, Inc. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 ;; Keywords: multilingual, Lao @@ -27,41 +30,26 @@ ;;; Code: -(make-coding-system - 'lao 2 ?L - "8-bit encoding for ASCII (MSB=0) and LAO (MSB=1)." - '(ascii lao nil nil - nil nil nil nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii lao) - (post-read-conversion . lao-post-read-conversion))) +(define-coding-system 'lao + "8-bit encoding for ASCII (MSB=0) and LAO (MSB=1)." + :coding-type 'charset + :mnemonic ?L + :charset-list '(lao)) (set-language-info-alist "Lao" '((charset lao) (coding-system lao) (coding-priority lao) (input-method . "lao") - (nonascii-translation . lao) (unibyte-display . lao) (features lao-util) (documentation . t))) -(aset use-default-ascent ?(1;(B t) -(aset use-default-ascent ?$,1D;(B t) -(aset use-default-ascent ?(1=(B t) -(aset use-default-ascent ?$,1D=(B t) -(aset use-default-ascent ?(1?(B t) -(aset use-default-ascent ?$,1D?(B t) -(aset use-default-ascent ?(1B(B t) -(aset use-default-ascent ?$,1DB(B t) -(aset ignore-relative-composition ?(1\(B t) -(aset ignore-relative-composition ?$,1D\(B t) - -;; Register a function to compose Lao characters. -(let ((patterns '(("\\c0\\c9?\\(\\(\\c2\\|\\c3\\)\\c4?\\|\\c4\\)?" - . lao-composition-function)))) - (aset composition-function-table (make-char 'lao) patterns) - (dotimes (i (1+ (- #xeff #xe80))) - (aset composition-function-table (decode-char 'ucs (+ i #xe80)) patterns))) +;; For automatic composition. +(let ((chars "(1QTUVWXY[\hijklm(B")) + (dotimes (i (length chars)) + (aset composition-function-table (aref chars i) + 'lao-composition-function))) (provide 'lao) diff --git a/lisp/language/malayalam.el b/lisp/language/malayalam.el index 3a7c19892c7..27bf122fd7c 100644 --- a/lisp/language/malayalam.el +++ b/lisp/language/malayalam.el @@ -41,6 +41,11 @@ South Indian language Malayalam is supported in this language environment.")) '("Indian")) +;; For automatic composition. +(set-char-table-range composition-function-table '(#x0d00 . #x0d7f) + 'malayalam-composition-function) + + (provide 'malayalam) ;;; malayalam.el ends here diff --git a/lisp/language/mlm-util.el b/lisp/language/mlm-util.el index a01f3c4a88f..b492d269ff2 100644 --- a/lisp/language/mlm-util.el +++ b/lisp/language/mlm-util.el @@ -100,21 +100,17 @@ ;;;###autoload -(defun malayalam-composition-function (from to pattern &optional string) - "Compose Malayalam characters in REGION, or STRING if specified. -Assume that the REGION or STRING must fully match the composable -PATTERN regexp." - (if string (malayalam-compose-syllable-string string) - (malayalam-compose-syllable-region from to)) - (- to from)) - -;; Register a function to compose Malayalam characters. -(mapc - (function (lambda (ucs) - (aset composition-function-table (decode-char 'ucs ucs) - (list (cons malayalam-composable-pattern - 'malayalam-composition-function))))) - (nconc '(#x0d02 #x0d03) (malayalam-range #x0d05 #x0d39))) +(defun malayalam-composition-function (pos &optional string) + "Compose Malayalam characters after the position POS. +If STRING is not nil, it is a string, and POS is an index to the string. +In this case, compose characters after POS of the string." + (if string + ;; Not yet implemented. + nil + (goto-char pos) + (if (looking-at malayalam-composable-pattern) + (prog1 (match-end 0) + (malayalam-compose-syllable-region pos (match-end 0)))))) ;; Notes on conversion steps. @@ -379,10 +375,16 @@ PATTERN regexp." (narrow-to-region from to) (goto-char (point-min)) ;; char-glyph-conversion - (while (re-search-forward mlm-char-glyph-regexp nil t) - (setq match-str (match-string 0)) - (setq glyph-str - (concat glyph-str (gethash match-str mlm-char-glyph-hash)))) + (while (not (eobp)) + (if (looking-at mlm-char-glyph-regexp) + (progn + (setq match-str (match-string 0) + glyph-str + (concat glyph-str + (gethash match-str mlm-char-glyph-hash))) + (goto-char (match-end 0))) + (setq glyph-str (concat glyph-str (string (following-char)))) + (forward-char 1))) (when (string-match mlm-glyph-reorder-key-glyphs glyph-str) ;; glyph reordering (setq glyph-reorder-regexps mlm-glyph-reordering-regexp-list) diff --git a/lisp/language/romanian.el b/lisp/language/romanian.el index 2c4d2de94ab..a89a9ab777c 100644 --- a/lisp/language/romanian.el +++ b/lisp/language/romanian.el @@ -1,9 +1,9 @@ ;;; romanian.el --- support for Romanian -*- coding: iso-latin-2; no-byte-compile: t -*- -;; Copyright (C) 1998 Free Software Foundation. +;; Copyright (C) 1998, 2002 Free Software Foundation. ;; Author: Dan Nicolaescu <done@ece.arizona.edu> -;; Keywords: multilingual, Romanian +;; Keywords: multilingual, Romanian, i18n ;; This file is part of GNU Emacs. @@ -24,23 +24,33 @@ ;;; Commentary: -;; Romanian ISO 8859-2 environment. +;; Romanian ISO 8859-2 environment plus 8859-16 coding system. ;;; Code: (set-language-info-alist - "Romanian" '((charset . (ascii latin-iso8859-2)) - (coding-system . (iso-8859-2)) - (coding-priority . (iso-8859-2)) - (nonascii-translation . latin-iso8859-2) + "Romanian" '((charset iso-8859-2) + (coding-system iso-8859-2 iso-latin-10) + (coding-priority iso-8859-2) + (nonascii-translation . iso-8859-2) (input-method . "latin-2-postfix") - (unibyte-syntax . "latin-2") (unibyte-display . iso-8859-2) (tutorial . "TUTORIAL.ro") (sample-text . "Bună ziua, bine ați venit!") - (documentation . t)) + (documentation . "Rmoanian environment using Latin-2 encoding. +An environment for generic Latin-10 encoding is also available.")) '("European")) +(define-coding-system 'iso-latin-10 + "ISO 2022 based 8-bit encoding for Latin-10." + :coding-type 'charset + :mnemonic ?* + :charset-list '(iso-8859-16) + :mime-charset 'iso-8859-16) + +(define-coding-system-alias 'iso-8859-16 'iso-latin-10) +(define-coding-system-alias 'latin-10 'iso-latin-10) + (provide 'romanian) ;;; romanian.el ends here diff --git a/lisp/language/slovak.el b/lisp/language/slovak.el index a03c84d7c3f..42983b72628 100644 --- a/lisp/language/slovak.el +++ b/lisp/language/slovak.el @@ -34,9 +34,8 @@ "Slovak" '((charset . (ascii latin-iso8859-2)) (coding-system . (iso-8859-2)) (coding-priority . (iso-8859-2)) - (nonascii-translation . latin-iso8859-2) + (nonascii-translation . iso-8859-2) (input-method . "slovak") - (unibyte-syntax . "latin-2") (unibyte-display . iso-8859-2) (tutorial . "TUTORIAL.sk") (sample-text . "Prajeme V,Ba(Bm pr,Bm(Bjemn,B}(B de,Br(B!") diff --git a/lisp/language/tamil.el b/lisp/language/tamil.el index be02f07376c..04f3eacc5e5 100644 --- a/lisp/language/tamil.el +++ b/lisp/language/tamil.el @@ -38,6 +38,9 @@ South Indian Language Tamil supported in this language environment.")) '("Indian")) +;; For automatic composition. +(set-char-table-range composition-function-table '(#x0b80 . #x0bff) + 'tamil-composition-function) (provide 'tamil) ;;; tamil.el ends here diff --git a/lisp/language/thai-util.el b/lisp/language/thai-util.el index 58588f974d7..251c1fee5bc 100644 --- a/lisp/language/thai-util.el +++ b/lisp/language/thai-util.el @@ -1,9 +1,12 @@ ;;; thai-util.el --- utilities for Thai -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 -;; Keywords: mule, multilingual, thai +;; Keywords: mule, multilingual, Thai, i18n ;; This file is part of GNU Emacs. @@ -139,95 +142,6 @@ (?,T|(B invalid nil) ; 0xFC (?,T}(B invalid nil) ; 0xFD (?,T~(B invalid nil) ; 0xFE - - ;; Unicode equivalents - (?$,1Ba(B consonant "LETTER KO KAI") - (?$,1Bb(B consonant "LETTER KHO KHAI") - (?$,1Bc(B consonant "LETTER KHO KHUAT") - (?$,1Bd(B consonant "LETTER KHO KHWAI") - (?$,1Be(B consonant "LETTER KHO KHON") - (?$,1Bf(B consonant "LETTER KHO RAKHANG") - (?$,1Bg(B consonant "LETTER NGO NGU") - (?$,1Bh(B consonant "LETTER CHO CHAN") - (?$,1Bi(B consonant "LETTER CHO CHING") - (?$,1Bj(B consonant "LETTER CHO CHANG") - (?$,1Bk(B consonant "LETTER SO SO") - (?$,1Bl(B consonant "LETTER CHO CHOE") - (?$,1Bm(B consonant "LETTER YO YING") - (?$,1Bn(B consonant "LETTER DO CHADA") - (?$,1Bo(B consonant "LETTER TO PATAK") - (?$,1Bp(B consonant "LETTER THO THAN") - (?$,1Bq(B consonant "LETTER THO NANGMONTHO") - (?$,1Br(B consonant "LETTER THO PHUTHAO") - (?$,1Bs(B consonant "LETTER NO NEN") - (?$,1Bt(B consonant "LETTER DO DEK") - (?$,1Bu(B consonant "LETTER TO TAO") - (?$,1Bv(B consonant "LETTER THO THUNG") - (?$,1Bw(B consonant "LETTER THO THAHAN") - (?$,1Bx(B consonant "LETTER THO THONG") - (?$,1By(B consonant "LETTER NO NU") - (?$,1Bz(B consonant "LETTER BO BAIMAI") - (?$,1B{(B consonant "LETTER PO PLA") - (?$,1B|(B consonant "LETTER PHO PHUNG") - (?$,1B}(B consonant "LETTER FO FA") - (?$,1B~(B consonant "LETTER PHO PHAN") - (?$,1B(B consonant "LETTER FO FAN") - (?$,1C (B consonant "LETTER PHO SAMPHAO") - (?$,1C!(B consonant "LETTER MO MA") - (?$,1C"(B consonant "LETTER YO YAK") - (?$,1C#(B consonant "LETTER RO RUA") - (?$,1C$(B vowel-base "LETTER RU (Pali vowel letter)") - (?$,1C%(B consonant "LETTER LO LING") - (?$,1C&(B vowel-base "LETTER LU (Pali vowel letter)") - (?$,1C'(B consonant "LETTER WO WAEN") - (?$,1C((B consonant "LETTER SO SALA") - (?$,1C)(B consonant "LETTER SO RUSI") - (?$,1C*(B consonant "LETTER SO SUA") - (?$,1C+(B consonant "LETTER HO HIP") - (?$,1C,(B consonant "LETTER LO CHULA") - (?$,1C-(B consonant "LETTER O ANG") - (?$,1C.(B consonant "LETTER HO NOK HUK") - (?$,1C/(B special "PAI YAN NOI (abbreviation)") - (?$,1C0(B vowel-base "VOWEL SIGN SARA A") - (?$,1C1(B vowel-upper "VOWEL SIGN MAI HAN-AKAT N/S-T") - (?$,1C2(B vowel-base "VOWEL SIGN SARA AA") - (?$,1C3(B vowel-base "VOWEL SIGN SARA AM") - (?$,1C4(B vowel-upper "VOWEL SIGN SARA I N/S-T") - (?$,1C5(B vowel-upper "VOWEL SIGN SARA II N/S-T") - (?$,1C6(B vowel-upper "VOWEL SIGN SARA UE N/S-T") - (?$,1C7(B vowel-upper "VOWEL SIGN SARA UEE N/S-T") - (?$,1C8(B vowel-lower "VOWEL SIGN SARA U N/S-B") - (?$,1C9(B vowel-lower "VOWEL SIGN SARA UU N/S-B") - (?$,1C:(B vowel-lower "VOWEL SIGN PHINTHU N/S-B (Pali virama)") - (?$,1C?(B special "BAHT SIGN (currency symbol)") - (?$,1C@(B vowel-base "VOWEL SIGN SARA E") - (?$,1CA(B vowel-base "VOWEL SIGN SARA AE") - (?$,1CB(B vowel-base "VOWEL SIGN SARA O") - (?$,1CC(B vowel-base "VOWEL SIGN SARA MAI MUAN") - (?$,1CD(B vowel-base "VOWEL SIGN SARA MAI MALAI") - (?$,1CE(B vowel-base "LAK KHANG YAO") - (?$,1CF(B special "MAI YAMOK (repetion)") - (?$,1CG(B vowel-upper "VOWEL SIGN MAI TAI KHU N/S-T") - (?$,1CH(B tone "TONE MAI EK N/S-T") - (?$,1CI(B tone "TONE MAI THO N/S-T") - (?$,1CJ(B tone "TONE MAI TRI N/S-T") - (?$,1CK(B tone "TONE MAI CHATTAWA N/S-T") - (?$,1CL(B tone "THANTHAKHAT N/S-T (cancellation mark)") - (?$,1CM(B tone "NIKKHAHIT N/S-T (final nasal)") - (?$,1CN(B vowel-upper "YAMAKKAN N/S-T") - (?$,1CO(B special "FONRMAN") - (?$,1CP(B special "DIGIT ZERO") - (?$,1CQ(B special "DIGIT ONE") - (?$,1CR(B special "DIGIT TWO") - (?$,1CS(B special "DIGIT THREE") - (?$,1CT(B special "DIGIT FOUR") - (?$,1CU(B special "DIGIT FIVE") - (?$,1CV(B special "DIGIT SIX") - (?$,1CW(B special "DIGIT SEVEN") - (?$,1CX(B special "DIGIT EIGHT") - (?$,1CY(B special "DIGIT NINE") - (?$,1CZ(B special "ANGKHANKHU (ellipsis)") - (?$,1C[(B special "KHOMUT (beginning of religious texts)") )) elm) (while l @@ -273,24 +187,20 @@ positions (integers or markers) specifying the region." (thai-compose-region (point-min) (point-max))) ;;;###autoload -(defun thai-post-read-conversion (len) - (thai-compose-region (point) (+ (point) len)) - len) - -;;;###autoload -(defun thai-composition-function (from to pattern &optional string) - "Compose Thai text in the region FROM and TO. -The text matches the regular expression PATTERN. -Optional 4th argument STRING, if non-nil, is a string containing text -to compose. - -The return value is number of composed characters." - (if (< (1+ from) to) - (progn - (if string - (compose-string string from to) - (compose-region from to)) - (- to from)))) +(defun thai-composition-function (pos &optional string) + (setq pos (1- pos)) + (let ((pattern "[,T!(B-,TCEG(B-,TN!(B-,TCEG(B-,TN(B][,TQT(B-,TWgnX(B-,TZQT(B-,TWgnX(B-,TZ(B]?[,Th(B-,Tmh(B-,Tm(B]?")) + (if string + (if (and (>= pos 0) + (eq (string-match pattern string pos) pos)) + (prog1 (match-end 0) + (compose-string string pos (match-end 0)))) + (if (>= pos (point-min)) + (progn + (goto-char pos) + (if (looking-at pattern) + (prog1 (match-end 0) + (compose-region pos (match-end 0))))))))) ;; (provide 'thai-util) diff --git a/lisp/language/thai.el b/lisp/language/thai.el index f822e93bd1b..0723c3d182b 100644 --- a/lisp/language/thai.el +++ b/lisp/language/thai.el @@ -1,9 +1,13 @@ ;;; thai.el --- support for Thai -*- coding: iso-2022-7bit; no-byte-compile: t -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. +;; Copyright (C) 2002 Free Software Foundation, Inc. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 -;; Keywords: multilingual, Thai +;; Keywords: multilingual, Thai, i18n ;; This file is part of GNU Emacs. @@ -28,14 +32,11 @@ ;;; Code: -(make-coding-system - 'thai-tis620 2 ?T - "8-bit encoding for ASCII (MSB=0) and Thai TIS620 (MSB=1)." - '(ascii thai-tis620 nil nil - nil nil nil nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii thai-tis620) - (mime-charset . tis-620) - (post-read-conversion . thai-post-read-conversion))) +(define-coding-system 'thai-tis620 + "8-bit encoding for ASCII (MSB=0) and Thai TIS620 (MSB=1)." + :coding-type 'charset + :mnemonic ?T + :charset-list '(tis620-2533)) (define-coding-system-alias 'th-tis620 'thai-tis620) (define-coding-system-alias 'tis620 'thai-tis620) @@ -44,9 +45,9 @@ (set-language-info-alist "Thai" '((tutorial . "TUTORIAL.th") (charset thai-tis620) - (coding-system thai-tis620) + (coding-system thai-tis620 iso-8859-11 cp874) (coding-priority thai-tis620) - (nonascii-translation . thai-tis620) + (nonascii-translation . tis620-2533) (input-method . "thai-kesmanee") (unibyte-display . thai-tis620) (features thai-util) @@ -55,13 +56,27 @@ (copy-sequence "Thai (,T@RIRd7B(B) ,TJ0GQ1J04U1$0CQ1:(B, ,TJ0GQ1J04U10$h1P(B"))) (documentation . t))) - -;; Register a function to compose Thai characters. -(let ((patterns '(("\\c0\\c4\\|\\c0\\(\\c2\\|\\c3\\)\\c4?" - . thai-composition-function)))) - (aset composition-function-table (make-char 'thai-tis620) patterns) - (dotimes (i (1+ (- #xe7f #xe00))) - (aset composition-function-table (decode-char 'ucs (+ i #xe00)) patterns))) +(define-coding-system 'cp874 + "DOS codepage 874 (Thai)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp874) + :mime-charset 'cp874) +(define-coding-system-alias 'ibm874 'cp874) + +(define-coding-system 'iso-8859-11 + "ISO/IEC 8859/11 (Latin/Thai) +This is the same as `thai-tis620' with the addition of no-break-space." + :coding-type 'charset + :mnemonic ?* + :mime-charset 'iso-8859-11 ; not actually registered as of 2002-05-24 + :charset-list '(iso-8859-11)) + +;; For automatic composition. +(let ((chars ",TQTUVWXYZghijklmn(B")) + (dotimes (i (length chars)) + (aset composition-function-table (aref chars i) + 'thai-composition-function))) (provide 'thai) diff --git a/lisp/language/tibet-util.el b/lisp/language/tibet-util.el index 2336b988fd0..260cf7efe54 100644 --- a/lisp/language/tibet-util.el +++ b/lisp/language/tibet-util.el @@ -36,13 +36,13 @@ ;;; Code: (defconst tibetan-obsolete-glyphs - `(("$(7!=(B" . "$(8!=(B") ; 2 col <-> 1 col - ("$(7!?(B" . "$(8!?(B") - ("$(7!@(B" . "$(8!@(B") - ("$(7!A(B" . "$(8!A(B") - ("$(7"`(B" . "$(8"`(B") - ("$(7!;(B" . "$(8!;(B") - ("$(7!D(B" . "$(8!D(B") + `(("$(7!=(B" . "$(7!=(B") ; 2 col <-> 1 col + ("$(7!?(B" . "$(7!?(B") + ("$(7!@(B" . "$(7!@(B") + ("$(7!A(B" . "$(7!A(B") + ("$(7"`(B" . "$(7"`(B") + ("$(7!;(B" . "$(7!;(B") + ("$(7!D(B" . "$(7!D(B") ;; Yes these are dirty. But ... ("$(7!>(B $(7!>(B" . ,(compose-string "$(7!>(B $(7!>(B" 0 3 [?$(7!>(B (Br . Bl) ? (Br . Bl) ?$(7!>(B])) ("$(7!4!5!5(B" . ,(compose-string @@ -137,7 +137,7 @@ The returned string has no composition information." ;;; ;;; Here are examples of the words "bsgrubs" and "hfauM" ;;; -;;; 4$(7"70"714%qx!"U0"G###C"U14"70"714"G0"G1(B 4$(7"Hx!"Rx!"Ur'"_0"H"R"U"_1(B +;;; $(7"7"G###C"U"7"G(B $(7"H"R"U"_(B ;;; ;;; M ;;; b s b s h @@ -163,7 +163,7 @@ The returned string has no composition information." ;; If 'a follows a consonant, turn it into the subjoined form. ;; * Disabled by Tomabechi 2000/06/09 * ;; Because in Unicode, $(7"A(B may follow directly a consonant without - ;; any intervening vowel, as in 4$(7"90"914""0"""Q14"A0"A1!;(B=4$(7"90"91(B 4$(7""0""1(B 4$(7"A0"A1(B not 4$(7"90"91(B 4$(7""0""1(B $(7"Q(B 4$(7"A0"A1(B + ;; any intervening vowel, as in $(7"9"""Q"A!;(B=$(7"9(B $(7""(B $(7"A(B not $(7"9(B $(7""(B $(7"Q(B $(7"A(B ;;(if (and (= char ?$(7"A(B) ;; (aref (char-category-set (car last)) ?0)) ;; (setq char ?$(7"R(B)) ;; modified for new font by Tomabechi 1999/12/10 @@ -185,7 +185,8 @@ The returned string has no composition information." ;; Compose lower vowel sign vertically under. ((aref (char-category-set char) ?3) - (if (eq char ?$(7"Q(B) ;; `$(7"Q(B' should not visible when composed. + (if (or (eq char ?$(7"Q(B) ;; `$(7"Q(B' and `$,1FP(B' should not visible when composed. + (eq char #xF70)) (setq rule nil) (setq rule stack-under))) ;; Transform ra-mgo (superscribed r) if followed by a subjoined @@ -311,11 +312,17 @@ are decomposed into normal Tibetan character sequences." new)) ;;;###autoload -(defun tibetan-composition-function (from to pattern &optional string) +(defun tibetan-composition-function (pos &optional string) + (setq pos (1- pos)) (if string - (tibetan-compose-string string) - (tibetan-compose-region from to)) - (- to from)) + ;; Not yet implemented. + nil + (if (>= pos (point-min)) + (save-excursion + (goto-char pos) + (if (looking-at tibetan-composable-pattern) + (prog1 (match-end 0) + (tibetan-compose-region pos (match-end 0)))))))) ;;; ;;; This variable is used to avoid repeated decomposition. diff --git a/lisp/language/tibetan.el b/lisp/language/tibetan.el index 3ca1da9d839..ab9516f73d1 100644 --- a/lisp/language/tibetan.el +++ b/lisp/language/tibetan.el @@ -1,9 +1,12 @@ ;;; tibetan.el --- support for Tibetan language -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 -;; Keywords: multilingual, Tibetan +;; Keywords: multilingual, Tibetan, i18n ;; This file is part of GNU Emacs. @@ -82,14 +85,12 @@ ;;; -(make-coding-system - 'tibetan-iso-8bit 2 ?Q - "8-bit encoding for ASCII (MSB=0) and TIBETAN (MSB=1)." - '(ascii tibetan nil nil - nil nil) - '((safe-charsets ascii tibetan) - (post-read-conversion . tibetan-post-read-conversion) - (pre-write-conversion . tibetan-pre-write-conversion))) +(define-coding-system 'tibetan-iso-8bit + "8-bit encoding for ASCII (MSB=0) and TIBETAN (MSB=1)." + :coding-type 'iso-2022 + :mnemonic ?Q + :designation [ascii tibetan nil nil] + :charset-list '(ascii tibetan)) (define-coding-system-alias 'tibetan 'tibetan-iso-8bit) @@ -100,11 +101,7 @@ (input-method . "tibetan-wylie") (features tibet-util) (documentation . t) - (sample-text - . (tibetan-compose-string - (copy-sequence -"Tibetan (4$(7"7r'"]0"7"]14"20"21!;4%P0"G#!"Q14"20"21!;(B) $(7!4!5!5!>4"70"714$P0"!#C"Q1!;4"Er'"S0"E"S14"G0"G1!;4"70"714"2r'"[0"2"[1!;4"Dr'"[0"D"[14"#0"#14"G0"G1!>4"Ir'"]r'"_0"I"]"_1!;4"90"9"Q1!;4"/r'"S0"/"S1!;4"50"5"Q14#2x!#9r'"[0"2#9"[1!;4"Hx!"Rx!"Ur'"c0"H"A"U"c1!>(B"))))) - + (sample-text "Tibetan ($(7"7"]"2!;"G#!"Q"2!;(B) $(7!4!5!5!>"7"!#C"Q!;"E"S"G!;"7"2"[!;"D"["#"G!>"I"]"_!;"9"Q!;"/"S!;"5"Q"2#9"[!;"H"A"U"c!>(B"))) ;; `$(7"A(B' is included in the pattern for subjoined consonants because we ;; treat it specially in tibetan-add-components. @@ -115,13 +112,9 @@ ;; $(7"A(B is removed from the class of subjoined. Tomabechi 2000/06/08 ;; (for Unicode support) (defconst tibetan-composable-pattern - "[$(7"!(B-$(7"J"K(B][$(7#!(B-$(7#J#K#L#M(B]*[$(7"Q"R"S(B-$(7"^"a"b"e(B]*[$(7"_"c"d"g(B-$(7"l!I!e!g(B]*" + "[$(7"!(B-$(7"J"K(B][$(7#!(B-$(7#J#K#L#M(B]*[$,1FP$(7"Q"R"S(B-$(7"^"a"b"e(B]*[$(7"_"c"d"g(B-$(7"l!I!e!g(B]*" "Regexp matching a composable sequence of Tibetan characters.") -;; Register a function to compose Tibetan characters. -(aset composition-function-table (make-char 'tibetan) - (list (cons tibetan-composable-pattern 'tibetan-composition-function))) - ;;; ;;; Definitions of conversion data. ;;; @@ -608,6 +601,16 @@ This also matches some punctuation characters which need conversion.") (defvar tibetan-decomposed nil) (defvar tibetan-decomposed-temp nil) + +;; For automatic composition. +(dolist (range '((?$(7#!(B . ?$(7#J(B) "$(7#K#L#M"Q"R(B" (?$(7"S(B . ?$(7"^(B) "$(7"a"b"e"_"c"d(B" (?$(7"g(B . ?$(7"l(B) "$(7!I!e!g(B")) + (if (stringp range) + (dotimes (i (length range)) + (aset composition-function-table (aref range i) + 'tibetan-composition-function)) + (set-char-table-range composition-function-table range + 'tibetan-composition-function))) + (provide 'tibetan) ;;; tibetan.el ends here diff --git a/lisp/language/tml-util.el b/lisp/language/tml-util.el index bb8c8f19e04..34c18741e97 100644 --- a/lisp/language/tml-util.el +++ b/lisp/language/tml-util.el @@ -101,23 +101,6 @@ (function (lambda (x y) (> (length x) (length y)))))))) -;;;###autoload -(defun tamil-composition-function (from to pattern &optional string) - "Compose Tamil characters in REGION, or STRING if specified. -Assume that the REGION or STRING must fully match the composable -PATTERN regexp." - (if string (tamil-compose-syllable-string string) - (tamil-compose-syllable-region from to)) - (- to from)) - -;; Register a function to compose Tamil characters. -(mapc - (function (lambda (ucs) - (aset composition-function-table (decode-char 'ucs ucs) - (list (cons tamil-composable-pattern - 'tamil-composition-function))))) - (nconc '(#x0b82 #x0b83) (tamil-range #x0b85 #x0bb9))) - ;; Notes on conversion steps. ;; 1. chars to glyphs @@ -135,7 +118,7 @@ PATTERN regexp." (defvar tml-char-glyph '(;; various signs - ;;("$,1<"(B" . "") + ("$,1<"(B" . "$,4)b(B") ;; not good ("$,1<#(B" . "$,4*G(B") ;; Independent Vowels ("$,1<%(B" . "$,4*<(B") @@ -334,10 +317,18 @@ PATTERN regexp." (narrow-to-region from to) (goto-char (point-min)) ;; char-glyph-conversion - (while (re-search-forward tml-char-glyph-regexp nil t) - (setq match-str (match-string 0)) - (setq glyph-str - (concat glyph-str (gethash match-str tml-char-glyph-hash)))) + (while (not (eobp)) + (if (looking-at tml-char-glyph-regexp) + (progn + (setq match-str (match-string 0) + glyph-str + (concat glyph-str + (gethash match-str tml-char-glyph-hash))) + (goto-char (match-end 0))) + (setq glyph-str (concat glyph-str (string (following-char)))) + (forward-char 1))) + (or glyph-str + (aset glyph-str 0 (following-char))) ;; glyph reordering (when (string-match tml-glyph-reorder-key-glyphs glyph-str) (if (string-match (car tml-glyph-reordering-regexp-list) @@ -362,6 +353,19 @@ PATTERN regexp." glyph-str)))) (compose-region from to glyph-str))))) +;;;###autoload +(defun tamil-composition-function (pos &optional string) + "Compose Tamil characters after the position POS. +If STRING is not nil, it is a string, and POS is an index to the string. +In this case, compose characters after POS of the string." + (if string + ;; Not yet implemented. + nil + (goto-char pos) + (if (looking-at tamil-composable-pattern) + (prog1 (match-end 0) + (tamil-compose-syllable-region pos (match-end 0)))))) + (provide 'tml-util) ;;; tml-util.el ends here diff --git a/lisp/language/utf-8-lang.el b/lisp/language/utf-8-lang.el index 11e51dbec2b..3aa845921ad 100644 --- a/lisp/language/utf-8-lang.el +++ b/lisp/language/utf-8-lang.el @@ -27,14 +27,15 @@ ;;; Code: (set-language-info-alist - "UTF-8" `((coding-system mule-utf-8) - (coding-priority mule-utf-8) - (setup-function - . (lambda () - ;; Use Unicode font under Windows. Jason Rumney fecit. - (if (and (fboundp 'w32-add-charset-info) - (not (boundp 'w32-unicode-charset-defined))) - (w32-add-charset-info "iso10646-1" 'w32-charset-ansi t)))) + "UTF-8" `((coding-system utf-8) + (coding-priority utf-8) +;; Presumably not relevant now. +;; (setup-function +;; . (lambda () +;; ;; Use Unicode font under Windows. Jason Rumney fecit. +;; (if (and (fboundp 'w32-add-charset-info) +;; (not (boundp 'w32-unicode-charset-defined))) +;; (w32-add-charset-info "iso10646-1" 'w32-charset-ansi t)))) ;; Is this appropriate? ;; (exit-function ;; . (lambda () @@ -45,8 +46,8 @@ ;; w32-charset-info-alist))))) (input-method . "rfc1345") ; maybe not the best choice (documentation . "\ -This language environment is a generic one for a subset of the Unicode -character set encoded in UTF-8.")) +This language environment is a generic one for the Unicode character set +encoded in UTF-8.")) nil) (provide 'utf-8-lang) diff --git a/lisp/language/viet-util.el b/lisp/language/viet-util.el index 1b72ff07d8b..04c26f14756 100644 --- a/lisp/language/viet-util.el +++ b/lisp/language/viet-util.el @@ -1,7 +1,10 @@ ;;; viet-util.el --- utilities for Vietnamese -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 ;; Keywords: mule, multilingual, Vietnamese @@ -39,8 +42,7 @@ ;;;###autoload (defun viet-encode-viscii-char (char) "Return VISCII character code of CHAR if appropriate." - (aref (char-table-extra-slot viet-viscii-nonascii-translation-table 0) - char)) + (encode-char char 'viscii)) ;; VIQR is a menmonic encoding specification for Vietnamese. ;; It represents diacritical marks by ASCII characters as follows: diff --git a/lisp/language/vietnamese.el b/lisp/language/vietnamese.el index 742ceafda69..8f403f112e0 100644 --- a/lisp/language/vietnamese.el +++ b/lisp/language/vietnamese.el @@ -1,9 +1,13 @@ ;;; vietnamese.el --- support for Vietnamese -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. +;; Licensed to the Free Software Foundation. +;; Copyright (C) 2002 Free Software Foundation, Inc. +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 -;; Keywords: multilingual, Vietnamese +;; Keywords: multilingual, Vietnamese, i18n ;; This file is part of GNU Emacs. @@ -29,242 +33,23 @@ ;;; Code: -(defvar viet-viscii-decode-table - [;; VISCII is a full 8-bit code. - 0 1 ?,2F(B 3 4 ?,2G(B ?,2g(B 7 8 9 10 11 12 13 14 15 - 16 17 18 19 ?,2V(B 21 22 23 24 ?,2[(B 26 27 28 29 ?,2\(B 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - ?,2U(B ?,2!(B ?,2"(B ?,2#(B ?,2$(B ?,2%(B ?,2&(B ?,2'(B ?,2((B ?,2)(B ?,2*(B ?,2+(B ?,2,(B ?,2-(B ?,2.(B ?,2/(B - ?,20(B ?,21(B ?,22(B ?,25(B ?,2~(B ?,2>(B ?,26(B ?,27(B ?,28(B ?,2v(B ?,2w(B ?,2o(B ?,2|(B ?,2{(B ?,2x(B ?,2O(B - ?,2u(B ?,1!(B ?,1"(B ?,1#(B ?,1$(B ?,1%(B ?,1&(B ?,1'(B ?,1((B ?,1)(B ?,1*(B ?,1+(B ?,1,(B ?,1-(B ?,1.(B ?,1/(B - ?,10(B ?,11(B ?,12(B ?,2^(B ?,2=(B ?,15(B ?,16(B ?,17(B ?,18(B ?,2q(B ?,2Q(B ?,2W(B ?,2X(B ?,1=(B ?,1>(B ?,2_(B - ?,2`(B ?,2a(B ?,2b(B ?,2c(B ?,2d(B ?,2e(B ?,1F(B ?,1G(B ?,2h(B ?,2i(B ?,2j(B ?,2k(B ?,2l(B ?,2m(B ?,2n(B ?,1O(B - ?,2p(B ?,1Q(B ?,2r(B ?,2s(B ?,2t(B ?,1U(B ?,1V(B ?,1W(B ?,1X(B ?,2y(B ?,2z(B ?,1[(B ?,1\(B ?,2}(B ?,1^(B ?,1_(B - ?,1`(B ?,1a(B ?,1b(B ?,1c(B ?,1d(B ?,1e(B ?,1f(B ?,1g(B ?,1h(B ?,1i(B ?,1j(B ?,1k(B ?,1l(B ?,1m(B ?,1n(B ?,1o(B - ?,1p(B ?,1q(B ?,1r(B ?,1s(B ?,1t(B ?,1u(B ?,1v(B ?,1w(B ?,1x(B ?,1y(B ?,1z(B ?,1{(B ?,1|(B ?,1}(B ?,1~(B ?,2f(B ] - "Vietnamese VISCII decoding table.") - -(let ((table (make-translation-table-from-vector viet-viscii-decode-table))) - (define-translation-table 'viet-viscii-nonascii-translation-table table) - (define-translation-table 'viet-viscii-encode-table - (char-table-extra-slot table 0))) - -;;; -;;; VSCII is a pre-version of TCVN-5712 and deprecated -;;; -(defvar viet-vscii-decode-table - [;; VSCII is a full 8-bit code. - 0 ?,2z(B ?,2x(B 3 ?,2W(B ?,2X(B ?,2f(B 7 8 9 10 11 12 13 14 15 - 16 ?,2Q(B ?,2_(B ?,2O(B ?,2V(B ?,2[(B ?,2}(B ?,2\(B 24 25 26 27 28 29 30 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - ?,2`(B ?,2d(B ?,2c(B ?,2a(B ?,2U(B ?,2#(B ?,2'(B ?,2h(B ?,2k(B ?,2((B ?,2i(B ?,2)(B ?,2.(B ?,2l(B ?,2o(B ?,2n(B - ?,2m(B ?,28(B ?,2r(B ?,2v(B ?,2u(B ?,2s(B ?,2w(B ?,25(B ?,26(B ?,27(B ?,2^(B ?,2>(B ?,2~(B ?,2y(B ?,2|(B ?,2{(B - 160 ?,2e(B ?,2b(B ?,2j(B ?,2t(B ?,2=(B ?,2_(B ?,2p(B ?,1e(B ?,1b(B ?,1j(B ?,1t(B ?,1=(B ?,1y(B ?,1p(B ?,2"(B - 192 193 194 195 196 ?,1`(B ?,1d(B ?,1c(B ?,1a(B ?,1U(B ?,2F(B ?,1"(B ?,1F(B ?,1G(B ?,1!(B ?,2G(B - ?,2!(B ?,2%(B ?,2&(B ?,2g(B ?,2%(B ?,2+(B ?,1#(B ?,1%(B ?,1&(B ?,1g(B ?,1$(B ?,1'(B ?,1h(B ?,2,(B ?,1k(B ?,1((B - ?,1i(B ?,1)(B ?,1+(B ?,1,(B ?,1-(B ?,1*(B ?,1.(B ?,1l(B ?,1o(B ?,2-(B ?,2*(B ?,20(B ?,1n(B ?,1m(B ?,18(B ?,1r(B - ?,21(B ?,1v(B ?,1u(B ?,1s(B ?,1w(B ?,10(B ?,11(B ?,12(B ?,1/(B ?,15(B ?,16(B ?,17(B ?,1^(B ?,1>(B ?,1~(B ?,1y(B - ?,22(B ?,1|(B ?,1{(B ?,1z(B ?,1x(B ?,1W(B ?,1X(B ?,1f(B ?,1Q(B ?,1q(B ?,1O(B ?,1V(B ?,1[(B ?,1}(B ?,1\(B ?,2/(B] - "Vietnamese VSCII decoding table.") - -(let ((table (make-translation-table-from-vector viet-vscii-decode-table))) - (define-translation-table 'viet-vscii-nonascii-translation-table table) - (define-translation-table 'viet-vscii-encode-table - (char-table-extra-slot table 0))) - -;; Does not support combining characters in the range [176, 180] -(defvar viet-tcvn-decode-table - [;; TCVN is a full 8-bit code. - 0 ?,2z(B ?,2x(B 3 ?,2W(B ?,2X(B ?,2f(B 7 8 9 10 11 12 13 14 15 - 16 ?,2Q(B ?,2q(B ?,2O(B ?,2V(B ?,2[(B ?,2}(B ?,2\(B 24 25 26 27 28 29 30 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - ?,2`(B ?,2d(B ?,2c(B ?,2a(B ?,2U(B ?,2#(B ?,2'(B ?,2h(B ?,2k(B ?,2((B ?,2i(B ?,2)(B ?,2.(B ?,2l(B ?,2o(B ?,2n(B - ?,2m(B ?,28(B ?,2r(B ?,2v(B ?,2u(B ?,2s(B ?,2w(B ?,25(B ?,26(B ?,27(B ?,2^(B ?,2>(B ?,2~(B ?,2y(B ?,2|(B ?,2{(B - 160 ?,2e(B ?,2b(B ?,2j(B ?,2t(B ?,2=(B ?,2_(B ?,2p(B ?,1e(B ?,1b(B ?,1j(B ?,1t(B ?,1=(B ?,1_(B ?,1p(B ?,2"(B - 176 177 178 179 180 ?,1`(B ?,1d(B ?,1c(B ?,1a(B ?,1U(B ?,2F(B ?,1"(B ?,1F(B ?,1G(B ?,1!(B ?,2G(B - ?,2!(B ?,2%(B ?,2&(B ?,2g(B ?,2$(B ?,2+(B ?,1#(B ?,1%(B ?,1&(B ?,1g(B ?,1$(B ?,1'(B ?,1h(B ?,2,(B ?,1k(B ?,1((B - ?,1i(B ?,1)(B ?,1+(B ?,1,(B ?,1-(B ?,1*(B ?,1.(B ?,1l(B ?,1o(B ?,2-(B ?,2*(B ?,20(B ?,1n(B ?,1m(B ?,18(B ?,1r(B - ?,21(B ?,1v(B ?,1u(B ?,1s(B ?,1w(B ?,10(B ?,11(B ?,12(B ?,1/(B ?,15(B ?,16(B ?,17(B ?,1^(B ?,1>(B ?,1~(B ?,1y(B - ?,22(B ?,1|(B ?,1{(B ?,1z(B ?,1x(B ?,1W(B ?,1X(B ?,1f(B ?,1Q(B ?,1q(B ?,1O(B ?,1V(B ?,1[(B ?,1}(B ?,1\(B ?,2/(B] - "Vietnamese TCVN-5712 decoding table.") - -(let ((table (make-translation-table-from-vector viet-tcvn-decode-table))) - (define-translation-table 'viet-tcvn-nonascii-translation-table table) - (define-translation-table 'viet-tcvn-encode-table - (char-table-extra-slot table 0))) - -;; (defvar viet-vps-decode-table -;; [;; VPS is a full 8-bit code. -;; 0 1 ?,2U(B ?,2'(B ?,2#(B ?,2)(B ?,2.(B 7 8 9 10 11 12 13 14 15 -;; ?,28(B ?,2w(B ?,25(B ?,2~(B ?,2x(B ?,2q(B 22 23 24 ?,2\(B 26 27 ?,2g(B ?,2f(B 30 31 -;; 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 -;; 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 -;; 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 -;; 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 -;; 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 -;; 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 -;; ?,2`(B ?,2d(B ?,2c(B ?,2$(B ?,2%(B ?,2&(B ?,1w(B ?,12(B ?,2e(B ?,1*(B ?,1+(B ?,1,(B ?,1.(B ?,2!(B ?,2"(B ?,2F(B -;; ?,2*(B 145 146 ?,2+(B ?,2,(B ?,2-(B ?,2/(B ?,20(B ?,21(B ?,22(B ?,1}(B ?,1V(B ?,1\(B ?,2>(B ?,26(B ?,27(B -;; 160 ?,1!(B ?,1"(B ?,1F(B ?,1G(B ?,1#(B ?,2^(B ?,1>(B ?,2y(B ?,16(B ?,17(B ?,1^(B ?,2{(B ?,2Q(B ?,1~(B ?,2W(B -;; ?,11(B ?,2X(B ?,2O(B ?,2[(B ?,2m(B ?,2l(B ?,15(B ?,2o(B ?,2n(B ?,2s(B ?,1X(B ?,1f(B ?,2r(B ?,2v(B ?,2u(B ?,1q(B -;; ?,1%(B ?,2a(B ?,2b(B ?,1$(B ?,1&(B ?,1g(B ?,1'(B ?,1p(B ?,1k(B ?,2i(B ?,2j(B ?,1)(B ?,1o(B ?,1-(B ?,18(B ?,1[(B -;; ?,2_(B ?,2|(B ?,10(B ?,1/(B ?,2t(B ?,1v(B ?,1=(B ?,2h(B ?,1W(B ?,1Q(B ?,2z(B ?,1{(B ?,1_(B ?,2}(B ?,2k(B 223 -;; ?,1`(B ?,1a(B ?,1b(B ?,1c(B ?,1d(B ?,1U(B ?,1e(B 231 ?,1h(B ?,1i(B ?,1j(B ?,1((B ?,1l(B ?,1m(B 238 ?,1n(B -;; ?,2G(B ?,2p(B ?,1r(B ?,1s(B ?,1t(B ?,1u(B 246 ?,2=(B ?,1x(B ?,1y(B ?,1z(B ?,1|(B 252 ?,2V(B ?,2((B ?,1O(B] -;; "Vietnamese VPS decoding table.") -;; -;; (let ((table (make-translation-table-from-vector viet-vps-decode-table))) -;; (define-translation-table 'viet-vps-nonascii-translation-table table) -;; (define-translation-table 'viet-vps-encode-table -;; (char-table-extra-slot table 0))) - -(define-ccl-program ccl-decode-viscii - `(3 - ((loop - (r0 = 0) - (read r1) - (translate-character viet-viscii-nonascii-translation-table r0 r1) - (write-multibyte-character r0 r1) - (repeat)))) - "CCL program to decode VISCII 1.1") - -(define-ccl-program ccl-encode-viscii - `(1 - ((loop - (read-multibyte-character r0 r1) - (translate-character viet-viscii-encode-table r0 r1) - (write-repeat r1)))) - "CCL program to encode VISCII 1.1") - -(define-ccl-program ccl-encode-viscii-font - `(0 - ;; In: R0:vietnamese-viscii-lower/vietnamese-viscii-upper - ;; R1:position code - ;; Out: R1:font code point - (translate-character viet-viscii-encode-table r0 r1)) - "CCL program to encode Vietnamese chars to VISCII 1.1 font") - -(define-ccl-program ccl-decode-vscii - `(3 - ((loop - (r0 = 0) - (read r1) - (translate-character viet-vscii-nonascii-translation-table r0 r1) - (write-multibyte-character r0 r1) - (repeat)))) - "CCL program to decode VSCII-1.") - -(define-ccl-program ccl-encode-vscii - `(1 - ((loop - (read-multibyte-character r0 r1) - (translate-character viet-vscii-encode-table r0 r1) - (write-repeat r1)))) - "CCL program to encode VSCII-1.") - -(define-ccl-program ccl-encode-vscii-font - `(0 - ;; In: R0:vietnamese-viscii-lower/vietnamese-viscii-upper - ;; R1:position code - ;; Out: R1:font code point - (translate-character viet-vscii-encode-table r0 r1)) - "CCL program to encode Vietnamese chars to VSCII-1 font.") - -(define-ccl-program ccl-decode-tcvn - `(3 - ((loop - (r0 = 0) - (read r1) - (translate-character viet-tcvn-nonascii-translation-table r0 r1) - (write-multibyte-character r0 r1) - (repeat)))) - "CCL program to decode TCVN-5712.") - -(define-ccl-program ccl-encode-tcvn - `(1 - ((loop - (read-multibyte-character r0 r1) - (translate-character viet-tcvn-encode-table r0 r1) - (write-repeat r1)))) - "CCL program to encode TCVN-5712.") - -(define-ccl-program ccl-encode-tcvn-font - `(0 - ;; In: R0:vietnamese-viscii-lower/vietnamese-viscii-upper - ;; R1:position code - ;; Out: R1:font code point - (translate-character viet-tcvn-encode-table r0 r1)) - "CCL program to encode Vietnamese chars to TCVN-5712 font.") - -;; (define-ccl-program ccl-decode-vps -;; `(3 -;; ((loop -;; (r0 = 0) -;; (read r1) -;; (translate-character viet-vps-nonascii-translation-table r0 r1) -;; (write-multibyte-character r0 r1) -;; (repeat)))) -;; "CCL program to decode VPS.") -;; -;; (define-ccl-program ccl-encode-vps -;; `(1 -;; ((loop -;; (read-multibyte-character r0 r1) -;; (translate-character viet-vps-encode-table r0 r1) -;; (write-repeat r1)))) -;; "CCL program to encode VPS.") -;; -;; (define-ccl-program ccl-encode-vps-font -;; `(0 -;; ;; In: R0:vietnamese-viscii-lower/vietnamese-viscii-upper -;; ;; R1:position code -;; ;; Out: R1:font code point -;; (translate-character viet-vps-encode-table r0 r1)) -;; "CCL program to encode Vietnamese chars to VPS font.") - -(make-coding-system - 'vietnamese-viscii 4 ?V - "8-bit encoding for Vietnamese VISCII 1.1 (MIME:VISCII)" - '(ccl-decode-viscii . ccl-encode-viscii) - '((safe-charsets ascii vietnamese-viscii-lower vietnamese-viscii-upper) - (mime-charset . viscii) - (valid-codes (0 . 255)))) +(define-coding-system 'vietnamese-viscii + "8-bit encoding for Vietnamese VISCII 1.1 (MIME:VISCII)." + :coding-type 'charset + :mnemonic ?V + :charset-list '(viscii) + :mime-charset 'viscii) (define-coding-system-alias 'viscii 'vietnamese-viscii) -(make-coding-system - 'vietnamese-vscii 4 ?v - "8-bit encoding for Vietnamese VSCII-1" - '(ccl-decode-vscii . ccl-encode-vscii) - '((safe-charsets ascii vietnamese-viscii-lower vietnamese-viscii-upper) - (valid-codes (0 . 255)))) +(define-coding-system 'vietnamese-vscii + "8-bit encoding for Vietnamese VSCII-1." + :coding-type 'charset + :mnemonic ?v + :charset-list '(vscii)) (define-coding-system-alias 'vscii 'vietnamese-vscii) -(make-coding-system - 'vietnamese-tcvn 4 ?t - "8-bit encoding for Vietnamese TCVN-5712" - '(ccl-decode-tcvn . ccl-encode-tcvn) - '((safe-charsets ascii vietnamese-viscii-lower vietnamese-viscii-upper) - (valid-codes (0 . 255)))) - -(define-coding-system-alias 'tcvn 'vietnamese-tcvn) - ;; (make-coding-system ;; 'vietnamese-vps 4 ?p ;; "8-bit encoding for Vietnamese VPS" @@ -274,36 +59,21 @@ ;; ;; (define-coding-system-alias 'vps 'vietnamese-vps) -(make-coding-system - 'vietnamese-viqr 0 ?q - "Vietnamese latin transcription (VIQR)" - nil - '((safe-charsets ascii vietnamese-viscii-lower vietnamese-viscii-upper) - (post-read-conversion . viqr-post-read-conversion) - (pre-write-conversion . viqr-pre-write-conversion) - (charset-origin-alist - (vietnamese-viscii-lower "VISCII" viet-encode-viscii-char) - (vietnamese-viscii-upper "VISCII" viet-encode-viscii-char)))) - +(define-coding-system 'vietnamese-viqr + "Vietnamese latin transcription (VIQR)." + :coding-type 'utf-8 + :mnemonic ?q + :charset-list '(ascii viscii) + :post-read-conversion 'viqr-post-read-conversion + :pre-write-conversion 'viqr-pre-write-conversion) (define-coding-system-alias 'viqr 'vietnamese-viqr) -(setq font-ccl-encoder-alist - (cons '("viscii" . ccl-encode-viscii-font) font-ccl-encoder-alist)) - -(setq font-ccl-encoder-alist - (cons '("vscii" . ccl-encode-vscii-font) font-ccl-encoder-alist)) - -(setq font-ccl-encoder-alist - (cons '("tcvn" . ccl-encode-tcvn-font) font-ccl-encoder-alist)) - (set-language-info-alist - "Vietnamese" `((charset vietnamese-viscii-lower vietnamese-viscii-upper) - (nonascii-translation - . ,(get 'viet-viscii-nonascii-translation-table - 'translation-table)) - (coding-system vietnamese-viscii vietnamese-vscii vietnamese-tcvn - vietnamese-viqr) + "Vietnamese" `((charset viscii) + (coding-system vietnamese-viscii vietnamese-vscii + vietnamese-tcvn vietnamese-viqr windows-1258) + (nonascii-translation . viscii) (coding-priority vietnamese-viscii) (input-method . "vietnamese-viqr") (unibyte-display . vietnamese-viscii) @@ -311,13 +81,27 @@ (sample-text . "Vietnamese (Ti,1*(Bng Vi,1.(Bt) Ch,1`(Bo b,1U(Bn") (documentation . "\ For Vietnamese, Emacs uses special charsets internally. -They can be decoded from and encoded to VISCII, VSCII, TCVN-5712, and -VIQR. VSCII is deprecated in favour of TCVN-5712. Current setting -puts higher priority to the coding system VISCII than TCVN-5712. If -you prefer TCVN-5712, please do: (prefer-coding-system 'vietnamese-tcvn). -There are two Vietnamese input methods: VIQR and Telex, VIQR is the -default setting.") - )) +They can be decoded from and encoded to VISCII, VSCII, TCVN-5712, VIQR +and windows-1258. VSCII is deprecated in favour of TCVN-5712. The +Current setting gives higher priority to the coding system VISCII than +TCVN-5712. If you prefer TCVN-5712, please do: (prefer-coding-system +'vietnamese-tcvn). There are two Vietnamese input methods: VIQR and +Telex, VIQR is the default setting."))) + +(define-coding-system 'windows-1258 + "windows-1258 encoding for Vietnamese (MIME: WINDOWS-1258)" + :coding-type 'charset + :mnemonic ?* + :charset-list '(windows-1258) + :mime-charset 'windows-1258) +(define-coding-system-alias 'cp1258 'windows-1258) + +(define-coding-system 'vietnamese-tcvn + "8-bit encoding for Vietnamese TCVN-5712" + :coding-type 'charset + :mnemonic ?t + :charset-list '(tcvn-5712)) +(define-coding-system-alias 'tcvn 'vietnamese-tcvn) (provide 'vietnamese) |