summaryrefslogtreecommitdiff
path: root/lisp/gnus/registry.el
blob: 9830fc30c9877da42869032312c9b85364b20077 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
;;; registry.el --- Track and remember data items by various fields

;; Copyright (C) 2011-2013 Free Software Foundation, Inc.

;; Author: Teodor Zlatanov <tzz@lifelogs.com>
;; Keywords: data

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.

;;; Commentary:

;; This library provides a general-purpose EIEIO-based registry
;; database with persistence, initialized with these fields:

;; version: a float, 0.1 currently (don't change it)

;; max-hard: an integer, default 5000000

;; max-soft: an integer, default 50000

;; precious: a list of symbols

;; tracked: a list of symbols

;; tracker: a hashtable tuned for 100 symbols to track (you should
;; only access this with the :lookup2-function and the
;; :lookup2+-function)

;; data: a hashtable with default size 10K and resize threshold 2.0
;; (this reflects the expected usage so override it if you know better)

;; ...plus methods to do all the work: `registry-search',
;; `registry-lookup', `registry-lookup-secondary',
;; `registry-lookup-secondary-value', `registry-insert',
;; `registry-delete', `registry-prune', `registry-size' which see

;; and with the following properties:

;; Every piece of data has a unique ID and some general-purpose fields
;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g.

;; ((F1 D1) (F2 D2) (F3 a b c))

;; Note that whether a field has one or many pieces of data, the data
;; is always a list of values.

;; The user decides which fields are "precious", F2 for example.  At
;; PRUNE TIME (when the :prune-function is called), the registry will
;; trim any entries without the F2 field until the size is :max-soft
;; or less.  No entries with the F2 field will be removed at PRUNE
;; TIME.

;; When an entry is inserted, the registry will reject new entries
;; if they bring it over the max-hard limit, even if they have the F2
;; field.

;; The user decides which fields are "tracked", F1 for example.  Any
;; new entry is then indexed by all the tracked fields so it can be
;; quickly looked up that way.  The data is always a list (see example
;; above) and each list element is indexed.

;; Precious and tracked field names must be symbols.  All other
;; fields can be any other Emacs Lisp types.

;;; Code:

(eval-when-compile (require 'cl))

(require 'eieio)
(require 'eieio-base)

(defclass registry-db (eieio-persistent)
  ((version :initarg :version
            :initform 0.1
            :type float
            :custom float
            :documentation "The registry version.")
   (max-hard :initarg :max-hard
             :initform 5000000
             :type integer
             :custom integer
             :documentation "Never accept more than this many elements.")
   (max-soft :initarg :max-soft
             :initform 50000
             :type integer
             :custom integer
             :documentation "Prune as much as possible to get to this size.")
   (prune-factor
    :initarg :prune-factor
    :initform 0.1
    :type float
    :custom float
    :documentation "At the max-hard limit, prune size * this entries.")
   (tracked :initarg :tracked
            :initform nil
            :type t
            :documentation "The tracked (indexed) fields, a list of symbols.")
   (precious :initarg :precious
             :initform nil
             :type t
             :documentation "The precious fields, a list of symbols.")
   (tracker :initarg :tracker
            :type hash-table
            :documentation "The field tracking hashtable.")
   (data :initarg :data
         :type hash-table
         :documentation "The data hashtable.")))

(eval-and-compile
  (defmethod initialize-instance :AFTER ((this registry-db) slots)
    "Set value of data slot of THIS after initialization."
    (with-slots (data tracker) this
      (unless (member :data slots)
	(setq data
	      (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal)))
      (unless (member :tracker slots)
	(setq tracker (make-hash-table :size 100 :rehash-size 2.0)))))

  (defmethod registry-lookup ((db registry-db) keys)
    "Search for KEYS in the registry-db THIS.
Returns an alist of the key followed by the entry in a list, not a cons cell."
    (let ((data (oref db :data)))
      (delq nil
	    (mapcar
	     (lambda (k)
	       (when (gethash k data)
		 (list k (gethash k data))))
	     keys))))

  (defmethod registry-lookup-breaks-before-lexbind ((db registry-db) keys)
    "Search for KEYS in the registry-db THIS.
Returns an alist of the key followed by the entry in a list, not a cons cell."
    (let ((data (oref db :data)))
      (delq nil
	    (loop for key in keys
		  when (gethash key data)
		  collect (list key (gethash key data))))))

  (defmethod registry-lookup-secondary ((db registry-db) tracksym
					&optional create)
    "Search for TRACKSYM in the registry-db THIS.
When CREATE is not nil, create the secondary index hashtable if needed."
    (let ((h (gethash tracksym (oref db :tracker))))
      (if h
	  h
	(when create
	  (puthash tracksym
		   (make-hash-table :size 800 :rehash-size 2.0 :test 'equal)
		   (oref db :tracker))
	  (gethash tracksym (oref db :tracker))))))

  (defmethod registry-lookup-secondary-value ((db registry-db) tracksym val
					      &optional set)
    "Search for TRACKSYM with value VAL in the registry-db THIS.
When SET is not nil, set it for VAL (use t for an empty list)."
    ;; either we're asked for creation or there should be an existing index
    (when (or set (registry-lookup-secondary db tracksym))
      ;; set the entry if requested,
      (when set
	(puthash val (if (eq t set) '() set)
		 (registry-lookup-secondary db tracksym t)))
      (gethash val (registry-lookup-secondary db tracksym)))))

(defun registry--match (mode entry check-list)
  ;; for all members
  (when check-list
    (let ((key (nth 0 (nth 0 check-list)))
          (vals (cdr-safe (nth 0 check-list)))
          found)
      (while (and key vals (not found))
        (setq found (case mode
                      (:member
                       (member (car-safe vals) (cdr-safe (assoc key entry))))
                      (:regex
                       (string-match (car vals)
                                     (mapconcat
                                      'prin1-to-string
                                      (cdr-safe (assoc key entry))
                                      "\0"))))
              vals (cdr-safe vals)))
      (or found
          (registry--match mode entry (cdr-safe check-list))))))

(eval-and-compile
  (defmethod registry-search ((db registry-db) &rest spec)
    "Search for SPEC across the registry-db THIS.
For example calling with :member '(a 1 2) will match entry '((a 3 1)).
Calling with :all t (any non-nil value) will match all.
Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\").
The test order is to check :all first, then :member, then :regex."
    (when db
      (let ((all (plist-get spec :all))
	    (member (plist-get spec :member))
	    (regex (plist-get spec :regex)))
	(loop for k being the hash-keys of (oref db :data)
	      using (hash-values v)
	      when (or
		    ;; :all non-nil returns all
		    all
		    ;; member matching
		    (and member (registry--match :member v member))
		    ;; regex matching
		    (and regex (registry--match :regex v regex)))
	      collect k))))

  (defmethod registry-delete ((db registry-db) keys assert &rest spec)
    "Delete KEYS from the registry-db THIS.
If KEYS is nil, use SPEC to do a search.
Updates the secondary ('tracked') indices as well.
With assert non-nil, errors out if the key does not exist already."
    (let* ((data (oref db :data))
	   (keys (or keys
		     (apply 'registry-search db spec)))
	   (tracked (oref db :tracked)))

      (dolist (key keys)
	(let ((entry (gethash key data)))
	  (when assert
	    (assert entry nil
		    "Key %s does not exists in database" key))
	  ;; clean entry from the secondary indices
	  (dolist (tr tracked)
	    ;; is this tracked symbol indexed?
	    (when (registry-lookup-secondary db tr)
	      ;; for every value in the entry under that key...
	      (dolist (val (cdr-safe (assq tr entry)))
		(let* ((value-keys (registry-lookup-secondary-value
				    db tr val)))
		  (when (member key value-keys)
		    ;; override the previous value
		    (registry-lookup-secondary-value
		     db tr val
		     ;; with the indexed keys MINUS the current key
		     ;; (we pass t when the list is empty)
		     (or (delete key value-keys) t)))))))
	  (remhash key data)))
      keys))

  (defmethod registry-full ((db registry-db))
    "Checks if registry-db THIS is full."
    (>= (registry-size db)
       (oref db :max-hard)))

  (defmethod registry-insert ((db registry-db) key entry)
    "Insert ENTRY under KEY into the registry-db THIS.
Updates the secondary ('tracked') indices as well.
Errors out if the key exists already."

    (assert (not (gethash key (oref db :data))) nil
	    "Key already exists in database")

    (assert (not (registry-full db))
	    nil
	    "registry max-hard size limit reached")

    ;; store the entry
    (puthash key entry (oref db :data))

    ;; store the secondary indices
    (dolist (tr (oref db :tracked))
      ;; for every value in the entry under that key...
      (dolist (val (cdr-safe (assq tr entry)))
	(let* ((value-keys (registry-lookup-secondary-value db tr val)))
	  (pushnew key value-keys :test 'equal)
	  (registry-lookup-secondary-value db tr val value-keys))))
    entry)

  (defmethod registry-reindex ((db registry-db))
    "Rebuild the secondary indices of registry-db THIS."
    (let ((count 0)
	  (expected (* (length (oref db :tracked)) (registry-size db))))
      (dolist (tr (oref db :tracked))
	(let (values)
	  (maphash
	   (lambda (key v)
	     (incf count)
	     (when (and (< 0 expected)
			(= 0 (mod count 1000)))
	       (message "reindexing: %d of %d (%.2f%%)"
			count expected (/ (* 100 count) expected)))
	     (dolist (val (cdr-safe (assq tr v)))
	       (let* ((value-keys (registry-lookup-secondary-value db tr val)))
		 (push key value-keys)
		 (registry-lookup-secondary-value db tr val value-keys))))
	   (oref db :data))))))

  (defmethod registry-size ((db registry-db))
    "Returns the size of the registry-db object THIS.
This is the key count of the :data slot."
    (hash-table-count (oref db :data)))

  (defmethod registry-prune ((db registry-db) &optional sortfun)
    "Prunes the registry-db object THIS.
Removes only entries without the :precious keys if it can,
then removes oldest entries first.
Returns the number of deleted entries.
If SORTFUN is given, tries to keep entries that sort *higher*.
SORTFUN is passed only the two keys so it must look them up directly."
    (dolist (collector '(registry-prune-soft-candidates
                         registry-prune-hard-candidates))
      (let* ((size (registry-size db))
             (collected (funcall collector db))
             (limit (nth 0 collected))
             (candidates (nth 1 collected))
             ;; sort the candidates if SORTFUN was given
             (candidates (if sortfun (sort candidates sortfun) candidates))
             (candidates-count (length candidates))
             ;; are we over max-soft?
             (prune-needed (> size limit)))

        ;; while we have more candidates than we need to remove...
        (while (and (> candidates-count (- size limit)) candidates)
          (decf candidates-count)
          (setq candidates (cdr candidates)))

        (registry-delete db candidates nil)
        (length candidates))))

  (defmethod registry-prune-soft-candidates ((db registry-db))
    "Collects pruning candidates from the registry-db object THIS.
Proposes only entries without the :precious keys."
    (let* ((precious (oref db :precious))
	   (precious-p (lambda (entry-key)
			 (cdr (memq (car entry-key) precious))))
	   (data (oref db :data))
	   (limit (oref db :max-soft))
	   (candidates (loop for k being the hash-keys of data
			     using (hash-values v)
			     when (notany precious-p v)
			     collect k)))
      (list limit candidates)))

  (defmethod registry-prune-hard-candidates ((db registry-db))
    "Collects pruning candidates from the registry-db object THIS.
Proposes any entries over the max-hard limit minus size * prune-factor."
    (let* ((data (oref db :data))
           ;; prune to (size * prune-factor) below the max-hard limit so
           ;; we're not pruning all the time
	   (limit (max 0 (- (oref db :max-hard)
                            (* (registry-size db) (oref db :prune-factor)))))
	   (candidates (loop for k being the hash-keys of data
			     collect k)))
      (list limit candidates))))

(provide 'registry)
;;; registry.el ends here