summaryrefslogtreecommitdiff
path: root/lisp/international
diff options
context:
space:
mode:
authorEli Zaretskii <eliz@gnu.org>2015-06-18 15:06:53 +0300
committerEli Zaretskii <eliz@gnu.org>2015-06-18 15:06:53 +0300
commitc4782ea5a7c5a63b6e004aefbb3b3898d3846ad5 (patch)
treef1651ec11ac7dbcb660883a655fdede2ce2b7235 /lisp/international
parent517ab6238a362bd0dda9af14681f5590da1df6de (diff)
downloademacs-c4782ea5a7c5a63b6e004aefbb3b3898d3846ad5.tar.gz
Improve and extend filepos-to-bufferpos
* lisp/international/mule-util.el (filepos-to-bufferpos--dos): Don't barf if F returns nil for some argument. (filepos-to-bufferpos): Expand to support UTF-16 and not assume that every encoding of type 'charset' is single-byte.
Diffstat (limited to 'lisp/international')
-rw-r--r--lisp/international/mule-util.el36
1 files changed, 34 insertions, 2 deletions
diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el
index 2a53e40b4fd..bbefdaa7324 100644
--- a/lisp/international/mule-util.el
+++ b/lisp/international/mule-util.el
@@ -320,6 +320,12 @@ per-character basis, this may not be accurate."
(while
(progn
(setq pos (funcall f (- byte eol-offset)))
+ ;; Protect against accidental values of BYTE outside of the
+ ;; valid region.
+ (when (null pos)
+ (if (<= byte eol-offset)
+ (setq pos (point-min))
+ (setq pos (point-max))))
;; Adjust POS for DOS EOL format.
(setq lines (1- (line-number-at-pos pos)))
(and (not (= lines eol-offset)) (> omax omin)))
@@ -345,7 +351,25 @@ QUALITY can be:
(unless coding-system (setq coding-system buffer-file-coding-system))
(let ((eol (coding-system-eol-type coding-system))
(type (coding-system-type coding-system))
+ (base (coding-system-base coding-system))
(pm (save-restriction (widen) (point-min))))
+ (and (eq type 'utf-8-emacs)
+ (setq type 'utf-8))
+ (and (eq type 'utf-8)
+ ;; Any post-read/pre-write conversions mean it's not really UTF-8.
+ (not (null (coding-system-get coding-system :pos-read-conversion)))
+ (setq type 'not-utf-8))
+ (and (not (eq type 'utf-8))
+ (eq quality 'exact)
+ (setq type 'use-exact))
+ (and (memq type '(charset raw-text undecided))
+ ;; The following are all of type 'charset', but they are
+ ;; actually variable-width encodings.
+ (not (memq base '(chinese-gbk chinese-gb18030 euc-tw euc-jis-2004
+ korean-iso-8bit chinese-iso-8bit
+ japanese-iso-8bit chinese-big5-hkscs
+ japanese-cp932 korean-cp949)))
+ (setq type 'single-byte))
(pcase type
(`utf-8
(when (coding-system-get coding-system :bom)
@@ -353,8 +377,16 @@ QUALITY can be:
(if (= eol 1)
(filepos-to-bufferpos--dos (+ pm byte) #'byte-to-position)
(byte-to-position (+ pm byte))))
- ;; FIXME: What if it's a 2-byte charset? Are there such beasts?
- (`charset
+ (`utf-16
+ ;; Account for BOM, which is always 2 bytes in UTF-16.
+ (setq byte (- byte 2))
+ ;; In approximate mode, assume all characters are within the
+ ;; BMP, i.e. take up 2 bytes.
+ (setq byte (/ byte 2))
+ (if (= eol 1)
+ (filepos-to-bufferpos--dos (+ pm byte) #'byte-to-position)
+ (byte-to-position (+ pm byte))))
+ (`single-byte
(if (= eol 1)
(filepos-to-bufferpos--dos (+ pm byte) #'identity)
(+ pm byte)))