summaryrefslogtreecommitdiff
path: root/lisp/language/lao-util.el
diff options
context:
space:
mode:
authorKenichi Handa <handa@m17n.org>1999-12-15 00:47:53 +0000
committerKenichi Handa <handa@m17n.org>1999-12-15 00:47:53 +0000
commit31ce771921c9c60c7893286693afad50606dbb98 (patch)
treee81030fc1f06dea243aad4f39e406279d67439f8 /lisp/language/lao-util.el
parent86e2a80397cd08feef069795522a64b6e8834d2f (diff)
downloademacs-31ce771921c9c60c7893286693afad50606dbb98.tar.gz
Mostly rewritten.
Diffstat (limited to 'lisp/language/lao-util.el')
-rw-r--r--lisp/language/lao-util.el382
1 files changed, 377 insertions, 5 deletions
diff --git a/lisp/language/lao-util.el b/lisp/language/lao-util.el
index cb78b7fc067..b12e63ae13d 100644
--- a/lisp/language/lao-util.el
+++ b/lisp/language/lao-util.el
@@ -30,6 +30,14 @@
(interactive)
(set-language-environment "Lao"))
+;; Setting information of Thai characters.
+
+(defconst lao-category-table (make-category-table))
+(define-category ?c "Lao consonant" lao-category-table)
+(define-category ?s "Lao semi-vowel" lao-category-table)
+(define-category ?v "Lao upper/lower vowel" lao-category-table)
+(define-category ?t "Lao tone" lao-category-table)
+
(let ((l '((?(1!(B consonant "LETTER KOR KAI'" "CHICKEN")
(?(1"(B consonant "LETTER KHOR KHAI'" "EGG")
(?(1#(B invalid nil)
@@ -127,11 +135,375 @@
))
elm)
(while l
- (setq elm (car l))
- (put-char-code-property (car elm) 'phonetic-type (car (cdr elm)))
- (put-char-code-property (car elm) 'name (nth 2 elm))
- (put-char-code-property (car elm) 'meaning (nth 3 elm))
- (setq l (cdr l))))
+ (setq elm (car l) l (cdr l))
+ (let ((char (car elm))
+ (ptype (nth 1 elm)))
+ (cond ((eq ptype 'consonant)
+ (modify-category-entry char ?c lao-category-table))
+ ((memq ptype '(vowel-upper vowel-lower))
+ (modify-category-entry char ?v lao-category-table))
+ ((eq ptype 'semivowel-lower)
+ (modify-category-entry char ?s lao-category-table))
+ ((eq ptype 'tone)
+ (modify-category-entry char ?t lao-category-table)))
+ (put-char-code-property char 'phonetic-type ptype)
+ (put-char-code-property char 'name (nth 2 elm))
+ (put-char-code-property char 'meaning (nth 3 elm)))))
+
+;; The general composing rules are as follows:
+;;
+;; T
+;; V T V T
+;; CV -> C, CT -> C, CVT -> C, Cv -> C, CvT -> C
+;; v v
+;; T
+;; V T V T
+;; CsV -> C, CsT -> C, CsVT -> C, Csv -> C, CvT -> C
+;; s s s s s
+;; v v
+
+
+;; where C: consonant, V: vowel upper, v: vowel lower,
+;; T: tone mark, s: semivowel lower
+
+(defvar lao-composition-pattern
+ "\\cc\\(\\ct\\|\\cv\\ct?\\|\\cs\\(\\ct\\|\\cv\\ct?\\)?\\)"
+ "Regular expression matching a Lao composite sequence.")
+
+;;;###autoload
+(defun lao-compose-string (str)
+ (with-category-table lao-category-table
+ (let ((idx 0))
+ (while (setq idx (string-match lao-composition-pattern str idx))
+ (compose-string str idx (match-end 0))
+ (setq idx (match-end 0))))
+ str))
+
+;;; LRT: Lao <-> Roman Transcription
+
+;; Upper vowels and tone-marks are put on the letter.
+;; Semi-vowel-sign-lo and lower vowels are put under the letter.
+
+(defconst lao-transcription-consonant-alist
+ (sort '(;; single consonants
+ ("k" . "(1!(B")
+ ("kh" . "(1"(B")
+ ("qh" . "(1$(B")
+ ("ng" . "(1'(B")
+ ("j" . "(1((B")
+ ("s" . "(1J(B")
+ ("x" . "(1*(B")
+ ("y" . "(1-(B")
+ ("d" . "(14(B")
+ ("t" . "(15(B")
+ ("th" . "(16(B")
+ ("dh" . "(17(B")
+ ("n" . "(19(B")
+ ("b" . "(1:(B")
+ ("p" . "(1;(B")
+ ("hp" . "(1<(B")
+ ("fh" . "(1=(B")
+ ("ph" . "(1>(B")
+ ("f" . "(1?(B")
+ ("m" . "(1A(B")
+ ("gn" . "(1B(B")
+ ("l" . "(1E(B")
+ ("r" . "(1C(B")
+ ("v" . "(1G(B")
+ ("w" . "(1G(B")
+ ("hh" . "(1K(B")
+ ("O" . "(1M(B")
+ ("h" . "(1N(B")
+ ("nh" . "(1|(B")
+ ("mh" . "(1}(B")
+ ("lh" . ["(1K\(B"])
+ ;; double consonants
+ ("ngh" . ["(1K'(B"])
+ ("yh" . ["(1K](B"])
+ ("wh" . ["(1KG(B"])
+ ("hl" . ["(1KE(B"])
+ ("hy" . ["(1K-(B"])
+ ("hn" . ["(1K9(B"])
+ ("hm" . ["(1KA(B"])
+ )
+ (function (lambda (x y) (> (length (car x)) (length (car y)))))))
+
+(defconst lao-transcription-semi-vowel-alist
+ '(("r" . "(1\(B")))
+
+(defconst lao-transcription-vowel-alist
+ (sort '(("a" . "(1P(B")
+ ("ar" . "(1R(B")
+ ("i" . "(1T(B")
+ ("ii" . "(1U(B")
+ ("eu" . "(1V(B")
+ ("ur" . "(1W(B")
+ ("u" . "(1X(B")
+ ("uu" . "(1Y(B")
+ ("e" . ["(1`P(B"])
+ ("ee" . "(1`(B")
+ ("ae" . ["(1aP(B"])
+ ("aa" . "(1a(B")
+ ("o" . ["(1bP(B"])
+ ("oo" . "(1b(B")
+ ("oe" . ["(1`RP(B"])
+ ("or" . "(1m(B")
+ ("er" . ["(1`T(B"])
+ ("ir" . ["(1`U(B"])
+ ("ua" . ["(1[GP(B"])
+ ("uaa" . ["(1[G(B"])
+ ("ie" . ["(1`Q]P(B"])
+ ("ia" . ["(1`Q](B"])
+ ("ea" . ["(1`VM(B"])
+ ("eaa" . ["(1`WM(B"])
+ ("ai" . "(1d(B")
+ ("ei" . "(1c(B")
+ ("ao" . ["(1`[R(B"])
+ ("aM" . "(1S(B"))
+ (function (lambda (x y) (> (length (car x)) (length (car y)))))))
+
+;; Maa-sakod is put at the tail.
+(defconst lao-transcription-maa-sakod-alist
+ '(("k" . "(1!(B")
+ ("g" . "(1'(B")
+ ("y" . "(1-(B")
+ ("d" . "(14(B")
+ ("n" . "(19(B")
+ ("b" . "(1:(B")
+ ("m" . "(1A(B")
+ ("v" . "(1G(B")
+ ("w" . "(1G(B")
+ ))
+
+(defconst lao-transcription-tone-alist
+ '(("'" . "(1h(B")
+ ("\"" . "(1i(B")
+ ("^" . "(1j(B")
+ ("+" . "(1k(B")
+ ("~" . "(1l(B")))
+
+(defconst lao-transcription-punctuation-alist
+ '(("\\0" . "(1p(B")
+ ("\\1" . "(1q(B")
+ ("\\2" . "(1r(B")
+ ("\\3" . "(1s(B")
+ ("\\4" . "(1t(B")
+ ("\\5" . "(1u(B")
+ ("\\6" . "(1v(B")
+ ("\\7" . "(1w(B")
+ ("\\8" . "(1x(B")
+ ("\\9" . "(1y(B")
+ ("\\\\" . "(1f(B")
+ ("\\$" . "(1O(B")))
+
+(defconst lao-transcription-pattern
+ (concat
+ "\\("
+ (mapconcat 'car lao-transcription-consonant-alist "\\|")
+ "\\)\\("
+ (mapconcat 'car lao-transcription-semi-vowel-alist "\\|")
+ "\\)?\\(\\("
+ (mapconcat 'car lao-transcription-vowel-alist "\\|")
+ "\\)\\("
+ (mapconcat 'car lao-transcription-maa-sakod-alist "\\|")
+ "\\)?\\("
+ (mapconcat (lambda (x) (regexp-quote (car x)))
+ lao-transcription-tone-alist "\\|")
+ "\\)?\\)?\\|"
+ (mapconcat (lambda (x) (regexp-quote (car x)))
+ lao-transcription-punctuation-alist "\\|")
+ )
+ "Regexp of Roman transcription pattern for one Lao syllable.")
+
+(defconst lao-transcription-pattern
+ (concat
+ "\\("
+ (regexp-opt (mapcar 'car lao-transcription-consonant-alist))
+ "\\)\\("
+ (regexp-opt (mapcar 'car lao-transcription-semi-vowel-alist))
+ "\\)?\\(\\("
+ (regexp-opt (mapcar 'car lao-transcription-vowel-alist))
+ "\\)\\("
+ (regexp-opt (mapcar 'car lao-transcription-maa-sakod-alist))
+ "\\)?\\("
+ (regexp-opt (mapcar 'car lao-transcription-tone-alist))
+ "\\)?\\)?\\|"
+ (regexp-opt (mapcar 'car lao-transcription-punctuation-alist))
+ )
+ "Regexp of Roman transcription pattern for one Lao syllable.")
+
+(defconst lao-vowel-reordering-rule
+ '(("(1P(B" (0 ?(1P(B) (0 ?(1Q(B))
+ ("(1R(B" (0 ?(1R(B))
+ ("(1T(B" (0 ?(1U(B))
+ ("(1U(B" (0 ?(1U(B))
+ ("(1V(B" (0 ?(1V(B))
+ ("(1W(B" (0 ?(1W(B))
+ ("(1X(B" (0 ?(1X(B))
+ ("(1Y(B" (0 ?(1Y(B))
+ ("(1`P(B" (?(1`(B 0 ?(1P(B) (?(1`(B 0 ?(1Q(B))
+ ("(1`(B" (?(1`(B 0))
+ ("(1aP(B" (?(1a(B 0 ?(1P(B) (?(1a(B 0 ?(1Q(B))
+ ("(1a(B" (?(1a(B 0))
+ ("(1bP(B" (?(1b(B 0 ?(1P(B) (0 ?(1[(B) (?(1-(B ?(1b(B 0 ?(1Q(B) (?(1G(B ?(1b(B 0 ?(1Q(B))
+ ("(1b(B" (?(1b(B 0))
+ ("(1`RP(B" (?(1`(B 0 ?(1R(B ?(1P(B) (0 ?(1Q(B ?(1M(B))
+ ("(1m(B" (0 ?(1m(B) (0 ?(1M(B))
+ ("(1`T(B" (?(1`(B 0 ?(1T(B))
+ ("(1`U(B" (?(1`(B 0 ?(1U(B))
+ ("(1[GP(B" (0 ?(1[(B ?(1G(B ?(1P(B) (0 ?(1Q(B ?(1G(B))
+ ("(1[G(B" (0 ?(1[(B ?(1G(B) (0 ?(1G(B))
+ ("(1`Q]P(B" (?(1`(B 0 ?(1Q(B ?(1](B ?(1P(B) (0 ?(1Q(B ?(1](B))
+ ("(1`Q](B" (?(1`(B 0 ?(1Q(B ?(1](B) (0 ?(1](B))
+ ("(1`VM(B" (?(1`(B 0 ?(1V(B ?(1M(B))
+ ("(1`WM(B" (?(1`(B 0 ?(1W(B ?(1M(B))
+ ("(1d(B" (?(1d(B 0))
+ ("(1c(B" (?(1c(B 0))
+ ("(1`[R(B" (?(1`(B 0 ?(1[(B ?(1R(B))
+ ("(1S(B" (0 ?(1S(B)))
+ "Alist of Lao vowel string vs the corresponding re-ordering rule.
+Each element has this form:
+ (VOWEL NO-MAA-SAKOD-RULE WITH-MAA-SAKOD-RULE (MAA-SAKOD-0 RULE-0) ...)
+
+VOWEL is a vowel string (e.g. \"(1`Q]P(B\").
+
+NO-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL following a
+consonant. It is a list vowel characters or 0. The element 0
+indicate the place to embed a consonant.
+
+Optional WITH-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL
+follwoing a consonant and preceding a maa-sakod character. If it is
+nil, NO-MAA-SAKOD-RULE is used. The maa-sakod character is alwasy
+appended at the tail.
+
+For instance, rule `(\"(1`WM(B\" (?(1`(B t ?(1W(B ?(1M(B))' tells that this vowel
+string following a consonant `(1!(B' should be re-ordered as \"(1`!WM(B\".
+
+Optional (MAA-SAKOD-n RULE-n) are rules specially applied to maa-sakod
+character MAA-SAKOD-n.")
+
+;;;###autoload
+(defun lao-transcribe-single-roman-syllable-to-lao (from to &optional str)
+ "Transcribe a Romanized Lao syllable in the region FROM and TO to Lao string.
+Only the first syllable is transcribed.
+The value has the form: (START END LAO-STRING), where
+START and END are the beggining and end positions of the Roman Lao syllable,
+LAO-STRING is the Lao character transcription of it.
+
+Optional 3rd arg STR, if non-nil, is a string to search for Roman Lao
+syllable. In that case, FROM and TO are indexes to STR."
+ (if str
+ (if (setq from (string-match lao-transcription-pattern str from))
+ (progn
+ (if (>= from to)
+ (setq from nil)
+ (setq to (match-end 0)))))
+ (save-excursion
+ (goto-char from)
+ (if (setq to (re-search-forward lao-transcription-pattern to t))
+ (setq from (match-beginning 0))
+ (setq from nil))))
+ (if from
+ (let* ((consonant (match-string 1 str))
+ (semivowel (match-string 3 str))
+ (vowel (match-string 5 str))
+ (maa-sakod (match-string 8 str))
+ (tone (match-string 9 str))
+ lao-consonant lao-semivowel lao-vowel lao-maa-sakod lao-tone
+ clen cidx)
+ (setq to (match-end 0))
+ (if (not consonant)
+ (setq str (cdr (assoc (match-string 0 str)
+ lao-transcription-punctuation-alist)))
+ (setq lao-consonant
+ (cdr (assoc consonant lao-transcription-consonant-alist)))
+ (if (vectorp lao-consonant)
+ (setq lao-consonant (aref lao-consonant 0)))
+ (setq clen (length lao-consonant))
+ (if semivowel
+ ;; Include semivowel in STR.
+ (setq lao-semivowel
+ (cdr (assoc semivowel lao-transcription-semi-vowel-alist))
+ str (if (= clen 1)
+ (concat lao-consonant lao-semivowel)
+ (concat (substring lao-consonant 0 1) lao-semivowel
+ (substring lao-consonant 1))))
+ (setq str lao-consonant))
+ (if vowel
+ (let (rule)
+ (setq lao-vowel
+ (cdr (assoc vowel lao-transcription-vowel-alist)))
+ (if (vectorp lao-vowel)
+ (setq lao-vowel (aref lao-vowel 0)))
+ (setq rule (assoc lao-vowel lao-vowel-reordering-rule))
+ (if (null maa-sakod)
+ (setq rule (nth 1 rule))
+ (setq lao-maa-sakod
+ (cdr (assoc maa-sakod lao-transcription-maa-sakod-alist))
+ rule
+ (or (cdr (assq (aref lao-maa-sakod 0) (nthcdr 2 rule)))
+ (nth 2 rule)
+ (nth 1 rule))))
+ (or rule
+ (error "Lao vowel %S has no re-ordering rule" lao-vowel))
+ (setq lao-consonant str str "")
+ (while rule
+ (if (= (car rule) 0)
+ (setq str (concat str lao-consonant)
+ cidx (length str))
+ (setq str (concat str (list (car rule)))))
+ (setq rule (cdr rule)))
+ (or cidx
+ (error "Lao vowel %S has malformed re-ordering rule" vowel))
+ ;; Set CIDX to after upper or lower vowel if any.
+ (let ((len (length str)))
+ (while (and (< cidx len)
+ (memq (get-char-code-property (aref str cidx)
+ 'phonetic-type)
+ '(vowel-lower vowel-upper)))
+ (setq cidx (1+ cidx))))
+ (if lao-maa-sakod
+ (setq str (concat str lao-maa-sakod)))
+ (if tone
+ (setq lao-tone
+ (cdr (assoc tone lao-transcription-tone-alist))
+ str (concat (substring str 0 cidx) lao-tone
+ (substring str cidx)))))))
+ (list from to (lao-compose-string str)))))
+
+;;;###autoload
+(defun lao-transcribe-roman-to-lao-string (str)
+ "Transcribe Romanized Lao string STR to Lao character string."
+ (let ((from 0)
+ (to (length str))
+ (lao-str "")
+ val)
+ (while (setq val (lao-transcribe-single-roman-syllable-to-lao from to str))
+ (let ((start (car val))
+ (end (nth 1 val))
+ (lao (nth 2 val)))
+ (if (> start from)
+ (setq lao-str (concat lao-str (substring str from start) lao))
+ (setq lao-str (concat lao-str lao)))
+ (setq from end)))
+ (if (< from to)
+ (concat lao-str (substring str from to))
+ lao-str)))
+
+;;;###autoload
+(defun lao-composition-function (from to pattern &optional string)
+ "Compose Lao text in the region FROM and TO.
+The text matches the regular expression PATTERN.
+Optional 4th argument STRING, if non-nil, is a string containing text
+to compose.
+
+The return value is number of composed characters."
+ (if (< (1+ from) to)
+ (prog1 (- to from)
+ (if string
+ (compose-string from to)
+ (compose-region from to))
+ (- to from))))
;;
(provide 'lao-util)