diff options
author | ShengHuo ZHU <zsh@cs.rochester.edu> | 2001-10-31 04:16:51 +0000 |
---|---|---|
committer | ShengHuo ZHU <zsh@cs.rochester.edu> | 2001-10-31 04:16:51 +0000 |
commit | 95fa1ff74aa9ae40d5ef4b680ea606287c40327f (patch) | |
tree | 900b4c445ed113bf645086ede4de094dd33c2230 /lisp/gnus/nnweb.el | |
parent | bf9bb76fe5da844622da05f1fd9aa140d8030381 (diff) | |
download | emacs-95fa1ff74aa9ae40d5ef4b680ea606287c40327f.tar.gz |
* mm-util.el, nnultimate.el, nnweb.el, nnslashdot.el: Sync with
the Gnus CVS.
* mm-util.el (mm-mime-mule-charset-alist): Move down and call
mm-coding-system-p. Don't correct it only in XEmacs.
(mm-charset-to-coding-system): Use mm-coding-system-p and
mm-get-coding-system-list.
(mm-emacs-mule, mm-mule4-p): New.
(mm-enable-multibyte, mm-disable-multibyte,
mm-enable-multibyte-mule4, mm-disable-multibyte-mule4,
mm-with-unibyte-current-buffer,
mm-with-unibyte-current-buffer-mule4): Use them.
(mm-find-mime-charset-region): Treat iso-2022-jp.
From Dave Love <fx@gnu.org>:
* mm-util.el (mm-mime-mule-charset-alist): Make it correct by
construction.
(mm-charset-synonym-alist): Remove windows-125[02]. Make other
entries conditional on not having a coding system defined for
them.
(mm-mule-charset-to-mime-charset): Use
find-coding-systems-for-charsets if defined.
(mm-charset-to-coding-system): Don't use
mm-get-coding-system-list. Look in mm-charset-synonym-alist
later. Add last resort search of coding systems.
(mm-enable-multibyte-mule4, mm-disable-multibyte-mule4)
(mm-with-unibyte-current-buffer-mule4): Just treat Mule 5 like
Mule 4.
(mm-find-mime-charset-region): Re-write.
(mm-with-unibyte-current-buffer): Restore buffer as well as
multibyteness.
Diffstat (limited to 'lisp/gnus/nnweb.el')
-rw-r--r-- | lisp/gnus/nnweb.el | 236 |
1 files changed, 203 insertions, 33 deletions
diff --git a/lisp/gnus/nnweb.el b/lisp/gnus/nnweb.el index c4ff7248e6b..740b182639f 100644 --- a/lisp/gnus/nnweb.el +++ b/lisp/gnus/nnweb.el @@ -1,5 +1,5 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996, 1997, 1998, 1999, 2000 +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 ;; Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> @@ -55,25 +55,48 @@ (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") "Where nnweb will save its files.") -(defvoo nnweb-type 'dejanews +(defvoo nnweb-type 'google "What search engine type is being used. -Valid types include `dejanews', `dejanewsold', `reference', +Valid types include `google', `dejanews', `dejanewsold', `reference', and `altavista'.") (defvar nnweb-type-definition - '((dejanews + '( + (google + ;;(article . nnweb-google-wash-article) + ;;(id . "http://groups.google.com/groups?as_umsgid=%s") (article . ignore) - (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanews-search) - (address . "http://www.deja.com/=dnc/qs.xp") - (identifier . nnweb-dejanews-identity)) - (dejanewsold + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + ;;(reference . nnweb-google-reference) + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) + (dejanews ;; alias of google + ;;(article . nnweb-google-wash-article) + ;;(id . "http://groups.google.com/groups?as_umsgid=%s") (article . ignore) - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanewsold-search) - (address . "http://www.deja.com/dnquery.xp") - (identifier . nnweb-dejanews-identity)) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + ;;(reference . nnweb-google-reference) + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) +;;; (dejanews +;;; (article . ignore) +;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") +;;; (map . nnweb-dejanews-create-mapping) +;;; (search . nnweb-dejanews-search) +;;; (address . "http://www.deja.com/=dnc/qs.xp") +;;; (identifier . nnweb-dejanews-identity)) +;;; (dejanewsold +;;; (article . ignore) +;;; (map . nnweb-dejanews-create-mapping) +;;; (search . nnweb-dejanewsold-search) +;;; (address . "http://www.deja.com/dnquery.xp") +;;; (identifier . nnweb-dejanews-identity)) (reference (article . nnweb-reference-wash-article) (map . nnweb-reference-create-mapping) @@ -124,6 +147,8 @@ and `altavista'.") (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) + (if nnweb-ephemeral-p + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) @@ -134,9 +159,10 @@ and `altavista'.") (when (and group (not (equal group nnweb-group)) (not nnweb-ephemeral-p)) + (setq nnweb-group group + nnweb-articles nil) (let ((info (assoc group nnweb-group-alist))) (when info - (setq nnweb-group group) (setq nnweb-type (nth 2 info)) (setq nnweb-search (nth 3 info)) (unless dont-check @@ -175,17 +201,19 @@ and `altavista'.") (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) - art) + art active) (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) - (and fetch - art - (mm-with-unibyte-current-buffer - (nnweb-fetch-url - (format fetch article))))))) + (when (and fetch art) + (setq url (format fetch art)) + (mm-with-unibyte-current-buffer + (nnweb-fetch-url url)) + (if (nnweb-definition 'reference t) + (setq article + (funcall (nnweb-definition + 'reference) article))))))) (unless nnheader-callback-function - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities)) + (funcall (nnweb-definition 'article))) (nnheader-report 'nnweb "Fetched article %s" article) (cons group (and (numberp article) article)))))) @@ -290,10 +318,11 @@ and `altavista'.") (nnweb-open-server server))) (unless nnweb-group-alist (nnweb-read-active)) + (unless nnweb-hashtb + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group (when (and (not nnweb-ephemeral-p) - (not (equal group nnweb-group))) - (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (equal group nnweb-group)) (nnweb-request-group group nil t)))) (defun nnweb-init (server) @@ -393,7 +422,7 @@ and `altavista'.") (car (rassq (string-to-number (match-string 2 date)) parse-time-months)) - (match-string 3 date) + (match-string 3 date) (match-string 1 date))) (setq date "Jan 1 00:00:00 0000")) (incf i) @@ -559,6 +588,7 @@ and `altavista'.") (while (search-forward "," nil t) (replace-match " " t t))) (widen) + (nnweb-decode-entities) (set-marker body nil)))) (defun nnweb-reference-search (search) @@ -663,7 +693,8 @@ and `altavista'.") (while (re-search-forward "<A.*\\?id@\\([^\"]+\\)\">[0-9]+</A>" nil t) (replace-match "<\\1> " t))) (widen) - (nnweb-remove-markup))) + (nnweb-remove-markup) + (nnweb-decode-entities))) (defun nnweb-altavista-search (search &optional part) (url-insert-file-contents @@ -683,13 +714,147 @@ and `altavista'.") t) ;;; +;;; Deja bought by google.com +;;; + +(defun nnweb-google-wash-article () + (let ((case-fold-search t) url) + (goto-char (point-min)) + (re-search-forward "^<pre>" nil t) + (narrow-to-region (point-min) (point)) + (search-backward "<table " nil t 2) + (delete-region (point-min) (point)) + (if (re-search-forward "Search Result [0-9]+" nil t) + (replace-match "")) + (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t) + (replace-match "")) + (goto-char (point-min)) + (while (search-forward "<br>" nil t) + (replace-match "\n")) + (nnweb-remove-markup) + (goto-char (point-min)) + (while (re-search-forward "^[ \t]*\n" nil t) + (replace-match "")) + (goto-char (point-max)) + (insert "\n") + (widen) + (narrow-to-region (point) (point-max)) + (search-forward "</pre>" nil t) + (delete-region (point) (point-max)) + (nnweb-remove-markup) + (widen))) + +(defun nnweb-google-parse-1 (&optional Message-ID) + (let ((i 0) + (case-fold-search t) + (active (cadr (assoc nnweb-group nnweb-group-alist))) + Subject Score Date Newsgroups From + map url mid) + (unless active + (push (list nnweb-group (setq active (cons 1 0)) + nnweb-type nnweb-search) + nnweb-group-alist)) + ;; Go through all the article hits on this page. + (goto-char (point-min)) + (while (re-search-forward + "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t) + (setq mid (match-string 2) + url (format + "http://groups.google.com/groups?selm=%s&output=gplain" mid)) + (narrow-to-region (search-forward ">" nil t) + (search-forward "</a>" nil t)) + (nnweb-remove-markup) + (nnweb-decode-entities) + (setq Subject (buffer-string)) + (goto-char (point-max)) + (widen) + (forward-line 1) + (when (looking-at "<br><font[^>]+>") + (goto-char (match-end 0))) + (if (not (looking-at "<a[^>]+>")) + (skip-chars-forward " \t") + (narrow-to-region (point) + (search-forward "</a>" nil t)) + (nnweb-remove-markup) + (nnweb-decode-entities) + (setq Newsgroups (buffer-string)) + (goto-char (point-max)) + (widen) + (skip-chars-forward "- \t")) + (when (looking-at + "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a") + (setq From (match-string 2) + Date (match-string 1))) + (forward-line 1) + (incf i) + (unless (nnweb-get-hashtb url) + (push + (list + (incf (cdr active)) + (make-full-mail-header + (cdr active) (if Newsgroups + (concat "(" Newsgroups ") " Subject) + Subject) + From Date (or Message-ID mid) + nil 0 0 url)) + map) + (nnweb-set-hashtb (cadar map) (car map)))) + map)) + +(defun nnweb-google-reference (id) + (let ((map (nnweb-google-parse-1 id)) header) + (setq nnweb-articles + (nconc nnweb-articles map)) + (when (setq header (cadar map)) + (mm-with-unibyte-current-buffer + (nnweb-fetch-url (mail-header-xref header))) + (caar map)))) + +(defun nnweb-google-create-mapping () + "Perform the search and create an number-to-url alist." + (save-excursion + (set-buffer nnweb-buffer) + (erase-buffer) + (when (funcall (nnweb-definition 'search) nnweb-search) + (let ((more t)) + (while more + (setq nnweb-articles + (nconc nnweb-articles (nnweb-google-parse-1))) + ;; FIXME: There is more. + (setq more nil)) + ;; Return the articles in the right order. + (setq nnweb-articles + (sort nnweb-articles 'car-less-than-car)))))) + +(defun nnweb-google-search (search) + (nnweb-insert + (concat + (nnweb-definition 'address) + "?" + (nnweb-encode-www-form-urlencoded + `(("q" . ,search) + ("num". "100") + ("hq" . "") + ("hl" . "") + ("lr" . "") + ("safe" . "off") + ("sites" . "groups"))))) + t) + +(defun nnweb-google-identity (url) + "Return an unique identifier based on URL." + (if (string-match "selm=\\([^ &>]+\\)" url) + (match-string 1 url) + url)) + +;;; ;;; General web/w3 interface utility functions ;;; (defun nnweb-insert-html (parse) "Insert HTML based on a w3 parse tree." (if (stringp parse) - (insert parse) + (insert (nnheader-string-as-multibyte parse)) (insert "<" (symbol-name (car parse)) " ") (insert (mapconcat (lambda (param) @@ -729,7 +894,7 @@ and `altavista'.") (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) (let ((elem (if (eq (aref (match-string 1) 0) ?\#) (let ((c - (string-to-number (substring + (string-to-number (substring (match-string 1) 1)))) (if (mm-char-or-char-int-p c) c 32)) (or (cdr (assq (intern (match-string 1)) @@ -739,9 +904,9 @@ and `altavista'.") (setq elem (char-to-string elem))) (replace-match elem t t)))) -(defun nnweb-decode-entities-string (str) +(defun nnweb-decode-entities-string (string) (with-temp-buffer - (insert str) + (insert string) (nnweb-decode-entities) (buffer-substring (point-min) (point-max)))) @@ -760,12 +925,12 @@ and `altavista'.") "Insert the contents from an URL in the current buffer. If FOLLOW-REFRESH is non-nil, redirect refresh url in META." (let ((name buffer-file-name)) - (if follow-refresh + (if follow-refresh (save-restriction (narrow-to-region (point) (point)) (url-insert-file-contents url) (goto-char (point-min)) - (when (re-search-forward + (when (re-search-forward "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t) (let ((url (match-string 1))) (delete-region (point-min) (point-max)) @@ -822,6 +987,11 @@ If FOLLOW-REFRESH is non-nil, redirect refresh url in META." (listp (cdr element))) (nnweb-text-1 element))))) +(defun nnweb-replace-in-string (string match newtext) + (while (string-match match string) + (setq string (replace-match newtext t t string))) + string) + (provide 'nnweb) ;;; nnweb.el ends here |