summaryrefslogtreecommitdiff
path: root/lisp/international/utf-7.el
blob: 9bc58b121ecda01b4108cf3d65fb1e7ca6dc5d8f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
;;; utf-7.el --- utf-7 coding system

;; Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
;;   Free Software Foundation, Inc.

;; Author: Dave Love <fx@gnu.org>
;; Keywords: i18n, mail

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.

;;; Commentary:

;; Defines a coding system for UTF-7, defined in RFC 2152.  Non-ASCII
;; segments are encoded as base64-encoded big endian UTF-16.  Also
;; defines a variation required for IMAP (RFC 2060).

;; The encoding and decoding was originally taken from Jon K Hellan's
;; implementation in Gnus, but has been substantially re-done.

;; This probably needs more attention.  In particular, it's not
;; completely consistent with iconv's behaviour.  It's arguable
;; whether the IMAP version should be a coding system since it's
;; apparently only used for IMAP mailbox names, so it's commented out.

;;; Code:

(defun utf-7-decode (len imap)
  "Decode LEN bytes of UTF-7 at point.
IMAP non-nil means use the IMAP version."
  (save-excursion
    (save-restriction
      (narrow-to-region (point) (+ (point) len))
      (let ((not-esc (if imap "^&" "^+"))
	    (skip-chars (if imap "A-Za-z0-9+," "A-Za-z0-9+/")))
	(while (not (eobp))
	  (skip-chars-forward not-esc)
	  (unless (eobp)
	    (forward-char)
	    (let ((p (point))
		  (run-length (skip-chars-forward skip-chars)))
	      (if (eq ?- (char-after))
		  (delete-char 1))
	      (unless (= run-length 0)	; encoded lone esc-char
		(let ((pl (mod (- run-length) 4)))
		  (insert-char ?= pl)
		  (if imap
		      (subst-char-in-region p (point) ?, ?/))
		  (base64-decode-region p (point)))
		(decode-coding-region p (point) 'utf-16be)
		(save-excursion
		  (goto-char p)
		  (delete-backward-char 1)))))))
      (- (point-max) (point-min)))))

;;;###autoload
(defun utf-7-post-read-conversion (len)
  (utf-7-decode len nil))

;;;###autoload
(defun utf-7-imap-post-read-conversion (len)
  (utf-7-decode len t))

(defun utf-7-encode (from to imap)
  "Encode bytes between FROM and TO to UTF-7.
ESC and SKIP-CHARS are adjusted for the normal and IMAP versions."
  (let* ((old-buf (current-buffer))
	 (esc (if imap ?& ?+))
	 ;; These are characters which can be encoded asis.
	 (skip-chars (if imap
			 "\t\n\r\x20-\x25\x27-\x7e" ; rfc2060 
		       ;; This includes the rfc2152 optional set.
		       ;; Perhaps it shouldn't (like iconv).
		       "\t\n\r -*,-[]-}"))
	 (not-skip-chars (format "^%s%c" skip-chars esc)))
    (set-buffer (generate-new-buffer " *temp*"))
    (if (stringp from)
	(insert from)
      (insert-buffer-substring old-buf from to))
    (goto-char (point-min))
    (while (not (eobp))
      (skip-chars-forward skip-chars)
      (if (eq ?+ (char-after))
	  (progn (forward-char)
		 (insert ?-))
	(unless (eobp)
	  (insert esc)
	  (let ((p (point)))
	    (skip-chars-forward not-skip-chars)
	    (save-restriction
	      ;; encode-coding-region doesn't preserve point
	      (narrow-to-region p (point))
	      (encode-coding-region p (point-max) 'utf-16be)
	      (base64-encode-region p (point-max))
	      (if imap
		  (subst-char-in-region p (point-max) ?/ ?,))
	      (goto-char p)
	      ;; As I read the RFC, this isn't correct, but it's
	      ;; consistent with iconv, at least regarding `='.
	      (skip-chars-forward "^= \t\n")
	      (delete-region (point) (point-max))))
          ;; RFC2060 stipulates that all names MUST end in US-ASCII (i.e.
          ;; a name that ends with a Unicode octet MUST end with a "-").
	  (if (or imap (not (eobp)))
	    (insert ?-)))))
    nil))

;;;###autoload
(defun utf-7-pre-write-conversion (from to)
  (utf-7-encode from to nil))

;;;###autoload
(defun utf-7-imap-pre-write-conversion (from to)
  (utf-7-encode from to t))

(provide 'utf-7)

;; arch-tag: 975ee403-90a4-4286-97d2-4ed1323f4ef9
;;; utf-7.el ends here