diff options
author | Eli Zaretskii <eliz@gnu.org> | 2017-12-15 11:06:07 +0200 |
---|---|---|
committer | Eli Zaretskii <eliz@gnu.org> | 2017-12-15 11:06:07 +0200 |
commit | 889f07c352f7e0deccf59353a60a45f2716551d8 (patch) | |
tree | 91d6574814bd5359b7498f747389f6ab00763536 | |
parent | a2697fac0ec0d4dd915b619bb76792121514acfa (diff) | |
download | emacs-889f07c352f7e0deccf59353a60a45f2716551d8.tar.gz |
Better support utf-8-with-signature and utf-8-hfs in XML/HTML
* lisp/international/mule.el (sgml-xml-auto-coding-function):
Support UTF-8 with BOM and utf-8-hfs as variants of UTF-8, and
obey the buffer's encoding if it is one of these variants, instead
of re-encoding in UTF-8 proper. (Bug#20623)
-rw-r--r-- | lisp/international/mule.el | 15 |
1 files changed, 13 insertions, 2 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el index 857fa800eb4..81c04db90e9 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el @@ -2493,7 +2493,17 @@ This function is intended to be added to `auto-coding-functions'." (let* ((match (match-string 1)) (sym (intern (downcase match)))) (if (coding-system-p sym) - sym + ;; If the encoding tag is UTF-8 and the buffer's + ;; encoding is one of the variants of UTF-8, use the + ;; buffer's encoding. This allows, e.g., saving an + ;; XML file as UTF-8 with BOM when the tag says UTF-8. + (let ((sym-type (coding-system-type sym)) + (bfcs-type + (coding-system-type buffer-file-coding-system))) + (if (and (coding-system-equal 'utf-8 sym-type) + (coding-system-equal 'utf-8 bfcs-type)) + buffer-file-coding-system + sym)) (message "Warning: unknown coding system \"%s\"" match) nil)) ;; Files without an encoding tag should be UTF-8. But users @@ -2506,7 +2516,8 @@ This function is intended to be added to `auto-coding-functions'." (coding-system-base (detect-coding-region (point-min) size t))))) ;; Pure ASCII always comes back as undecided. - (if (memq detected '(utf-8 undecided)) + (if (memq detected + '(utf-8 'utf-8-with-signature 'utf-8-hfs undecided)) 'utf-8 (warn "File contents detected as %s. Consider adding an encoding attribute to the xml declaration, |