summaryrefslogtreecommitdiff
path: root/lisp/mail/rfc2231.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/mail/rfc2231.el')
-rw-r--r--lisp/mail/rfc2231.el308
1 files changed, 308 insertions, 0 deletions
diff --git a/lisp/mail/rfc2231.el b/lisp/mail/rfc2231.el
new file mode 100644
index 00000000000..128779ab4c6
--- /dev/null
+++ b/lisp/mail/rfc2231.el
@@ -0,0 +1,308 @@
+;;; rfc2231.el --- Functions for decoding rfc2231 headers
+
+;; Copyright (C) 1998-2016 Free Software Foundation, Inc.
+
+;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;;; Code:
+
+(eval-when-compile (require 'cl))
+(require 'ietf-drums)
+(require 'rfc2047)
+(autoload 'mm-encode-body "mm-bodies")
+(autoload 'mail-header-remove-whitespace "mail-parse")
+(autoload 'mail-header-remove-comments "mail-parse")
+
+(defun rfc2231-get-value (ct attribute)
+ "Return the value of ATTRIBUTE from CT."
+ (cdr (assq attribute (cdr ct))))
+
+(defun rfc2231-parse-qp-string (string)
+ "Parse QP-encoded string using `rfc2231-parse-string'.
+N.B. This is in violation with RFC2047, but it seem to be in common use."
+ (rfc2231-parse-string (rfc2047-decode-string string)))
+
+(defun rfc2231-parse-string (string &optional signal-error)
+ "Parse STRING and return a list.
+The list will be on the form
+ `(name (attribute . value) (attribute . value)...)'.
+
+If the optional SIGNAL-ERROR is non-nil, signal an error when this
+function fails in parsing of parameters. Otherwise, this function
+must never cause a Lisp error."
+ (with-temp-buffer
+ (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token))
+ (stoken (ietf-drums-token-to-list ietf-drums-tspecials))
+ (ntoken (ietf-drums-token-to-list "0-9"))
+ c type attribute encoded number parameters value)
+ (ietf-drums-init
+ (condition-case nil
+ (mail-header-remove-whitespace
+ (mail-header-remove-comments string))
+ ;; The most likely cause of an error is unbalanced parentheses
+ ;; or double-quotes. If all parentheses and double-quotes are
+ ;; quoted meaninglessly with backslashes, removing them might
+ ;; make it parsable. Let's try...
+ (error
+ (let (mod)
+ (when (and (string-match "\\\\\"" string)
+ (not (string-match "\\`\"\\|[^\\]\"" string)))
+ (setq string (replace-regexp-in-string "\\\\\"" "\"" string)
+ mod t))
+ (when (and (string-match "\\\\(" string)
+ (string-match "\\\\)" string)
+ (not (string-match "\\`(\\|[^\\][()]" string)))
+ (setq string (replace-regexp-in-string
+ "\\\\\\([()]\\)" "\\1" string)
+ mod t))
+ (or (and mod
+ (ignore-errors
+ (mail-header-remove-whitespace
+ (mail-header-remove-comments string))))
+ ;; Finally, attempt to extract only type.
+ (if (string-match
+ (concat "\\`[\t\n ]*\\([^" ietf-drums-tspecials "\t\n ]+"
+ "\\(?:/[^" ietf-drums-tspecials
+ "\t\n ]+\\)?\\)\\(?:[\t\n ;]\\|\\'\\)")
+ string)
+ (match-string 1 string)
+ ""))))))
+ (let ((table (copy-syntax-table ietf-drums-syntax-table)))
+ (modify-syntax-entry ?\' "w" table)
+ (modify-syntax-entry ?* " " table)
+ (modify-syntax-entry ?\; " " table)
+ (modify-syntax-entry ?= " " table)
+ ;; The following isn't valid, but one should be liberal
+ ;; in what one receives.
+ (modify-syntax-entry ?\: "w" table)
+ (set-syntax-table table))
+ (setq c (char-after))
+ (when (and (memq c ttoken)
+ (not (memq c stoken))
+ (setq type (ignore-errors
+ (downcase
+ (buffer-substring (point) (progn
+ (forward-sexp 1)
+ (point)))))))
+ ;; Do the params
+ (condition-case err
+ (progn
+ (while (not (eobp))
+ (setq c (char-after))
+ (unless (eq c ?\;)
+ (error "Invalid header: %s" string))
+ (forward-char 1)
+ ;; If c in nil, then this is an invalid header, but
+ ;; since elm generates invalid headers on this form,
+ ;; we allow it.
+ (when (setq c (char-after))
+ (if (and (memq c ttoken)
+ (not (memq c stoken)))
+ (setq attribute
+ (intern
+ (downcase
+ (buffer-substring
+ (point) (progn (forward-sexp 1) (point))))))
+ (error "Invalid header: %s" string))
+ (setq c (char-after))
+ (if (eq c ?*)
+ (progn
+ (forward-char 1)
+ (setq c (char-after))
+ (if (not (memq c ntoken))
+ (setq encoded t
+ number nil)
+ (setq number
+ (string-to-number
+ (buffer-substring
+ (point) (progn (forward-sexp 1) (point)))))
+ (setq c (char-after))
+ (when (eq c ?*)
+ (setq encoded t)
+ (forward-char 1)
+ (setq c (char-after)))))
+ (setq number nil
+ encoded nil))
+ (unless (eq c ?=)
+ (error "Invalid header: %s" string))
+ (forward-char 1)
+ (setq c (char-after))
+ (cond
+ ((eq c ?\")
+ (setq value (buffer-substring (1+ (point))
+ (progn
+ (forward-sexp 1)
+ (1- (point)))))
+ (when encoded
+ (setq value (mapconcat (lambda (c) (format "%%%02x" c))
+ value ""))))
+ ((and (or (memq c ttoken)
+ ;; EXTENSION: Support non-ascii chars.
+ (> c ?\177))
+ (not (memq c stoken)))
+ (setq value
+ (buffer-substring
+ (point)
+ (progn
+ ;; Jump over asterisk, non-ASCII
+ ;; and non-boundary characters.
+ (while (and c
+ (or (eq c ?*)
+ (> c ?\177)
+ (not (eq (char-syntax c) ? ))))
+ (forward-char 1)
+ (setq c (char-after)))
+ (point)))))
+ (t
+ (error "Invalid header: %s" string)))
+ (push (list attribute value number encoded)
+ parameters))))
+ (error
+ (setq parameters nil)
+ (when signal-error
+ (signal (car err) (cdr err)))))
+
+ ;; Now collect and concatenate continuation parameters.
+ (let ((cparams nil)
+ elem)
+ (loop for (attribute value part encoded)
+ in (sort parameters (lambda (e1 e2)
+ (< (or (caddr e1) 0)
+ (or (caddr e2) 0))))
+ do (cond
+ ;; First part.
+ ((or (not (setq elem (assq attribute cparams)))
+ (and (numberp part)
+ (zerop part)))
+ (push (list attribute value encoded) cparams))
+ ;; Repetition of a part; do nothing.
+ ((and elem
+ (null number))
+ )
+ ;; Concatenate continuation parts.
+ (t
+ (setcar (cdr elem) (concat (cadr elem) value)))))
+ ;; Finally decode encoded values.
+ (cons type (mapcar
+ (lambda (elem)
+ (cons (car elem)
+ (if (nth 2 elem)
+ (rfc2231-decode-encoded-string (nth 1 elem))
+ (nth 1 elem))))
+ (nreverse cparams))))))))
+
+(defun rfc2231-decode-encoded-string (string)
+ "Decode an RFC2231-encoded string.
+These look like:
+ \"us-ascii\\='en-us\\='This%20is%20%2A%2A%2Afun%2A%2A%2A\",
+ \"us-ascii\\='\\='This%20is%20%2A%2A%2Afun%2A%2A%2A\",
+ \"\\='en-us\\='This%20is%20%2A%2A%2Afun%2A%2A%2A\",
+ \"\\='\\='This%20is%20%2A%2A%2Afun%2A%2A%2A\", or
+ \"This is ***fun***\"."
+ (string-match "\\`\\(?:\\([^']+\\)?'\\([^']+\\)?'\\)?\\(.+\\)" string)
+ (let ((coding-system (mm-charset-to-coding-system
+ (match-string 1 string) nil t))
+ ;;(language (match-string 2 string))
+ (value (match-string 3 string)))
+ (mm-with-unibyte-buffer
+ (insert value)
+ (goto-char (point-min))
+ (while (re-search-forward "%\\([0-9A-Fa-f][0-9A-Fa-f]\\)" nil t)
+ (insert
+ (prog1
+ (string-to-number (match-string 1) 16)
+ (delete-region (match-beginning 0) (match-end 0)))))
+ ;; Decode using the charset, if any.
+ (if (memq coding-system '(nil ascii))
+ (buffer-string)
+ (decode-coding-string (buffer-string) coding-system)))))
+
+(defun rfc2231-encode-string (param value)
+ "Return and PARAM=VALUE string encoded according to RFC2231.
+Use `mml-insert-parameter' or `mml-insert-parameter-string' to insert
+the result of this function."
+ (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token))
+ (tspecial (ietf-drums-token-to-list ietf-drums-tspecials))
+ (special (ietf-drums-token-to-list "*'%\n\t"))
+ (ascii (ietf-drums-token-to-list ietf-drums-text-token))
+ (num -1)
+ ;; Don't make lines exceeding 76 column.
+ (limit (- 74 (length param)))
+ spacep encodep charsetp charset broken)
+ (mm-with-multibyte-buffer
+ (insert value)
+ (goto-char (point-min))
+ (while (not (eobp))
+ (cond
+ ((or (memq (following-char) control)
+ (memq (following-char) tspecial)
+ (memq (following-char) special))
+ (setq encodep t))
+ ((eq (following-char) ? )
+ (setq spacep t))
+ ((not (memq (following-char) ascii))
+ (setq charsetp t)))
+ (forward-char 1))
+ (when charsetp
+ (setq charset (mm-encode-body)))
+ (mm-disable-multibyte)
+ (cond
+ ((or encodep charsetp
+ (progn
+ (end-of-line)
+ (> (current-column) (if spacep (- limit 2) limit))))
+ (setq limit (- limit 6))
+ (goto-char (point-min))
+ (insert (symbol-name (or charset 'us-ascii)) "''")
+ (while (not (eobp))
+ (if (or (not (memq (following-char) ascii))
+ (memq (following-char) control)
+ (memq (following-char) tspecial)
+ (memq (following-char) special)
+ (eq (following-char) ? ))
+ (progn
+ (when (>= (current-column) (1- limit))
+ (insert ";\n")
+ (setq broken t))
+ (insert "%" (format "%02x" (following-char)))
+ (delete-char 1))
+ (when (> (current-column) limit)
+ (insert ";\n")
+ (setq broken t))
+ (forward-char 1)))
+ (goto-char (point-min))
+ (if (not broken)
+ (insert param "*=")
+ (while (not (eobp))
+ (insert (if (>= num 0) " " "")
+ param "*" (format "%d" (incf num)) "*=")
+ (forward-line 1))))
+ (spacep
+ (goto-char (point-min))
+ (insert param "=\"")
+ (goto-char (point-max))
+ (insert "\""))
+ (t
+ (goto-char (point-min))
+ (insert param "=")))
+ (buffer-string))))
+
+(provide 'rfc2231)
+
+;;; rfc2231.el ends here