summaryrefslogtreecommitdiff
path: root/lib/pstore.rb
blob: 8032c8d52548efb1ed28ea5d25419079097de48f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
# = PStore -- Transactional File Storage for Ruby Objects
#
# pstore.rb -
#   originally by matz
#   documentation by Kev Jackson and James Edward Gray II
#   improved by Hongli Lai
#
# See PStore for documentation.


require "fileutils"
require "digest/md5"
require "thread"

#
# PStore implements a file based persistence mechanism based on a Hash.  User
# code can store hierarchies of Ruby objects (values) into the data store file
# by name (keys).  An object hierarchy may be just a single object.  User code 
# may later read values back from the data store or even update data, as needed.
# 
# The transactional behavior ensures that any changes succeed or fail together.
# This can be used to ensure that the data store is not left in a transitory
# state, where some values were updated but others were not.
# 
# Behind the scenes, Ruby objects are stored to the data store file with 
# Marshal.  That carries the usual limitations.  Proc objects cannot be 
# marshalled, for example.
#
# == Usage example:
# 
#  require "pstore"
#  
#  # a mock wiki object...
#  class WikiPage
#    def initialize( page_name, author, contents )
#      @page_name = page_name
#      @revisions = Array.new
#      
#      add_revision(author, contents)
#    end
#    
#    attr_reader :page_name
#    
#    def add_revision( author, contents )
#      @revisions << { :created  => Time.now,
#                      :author   => author,
#                      :contents => contents }
#    end
#    
#    def wiki_page_references
#      [@page_name] + @revisions.last[:contents].scan(/\b(?:[A-Z]+[a-z]+){2,}/)
#    end
#    
#    # ...
#  end
#  
#  # create a new page...
#  home_page = WikiPage.new( "HomePage", "James Edward Gray II",
#                            "A page about the JoysOfDocumentation..." )
#  
#  # then we want to update page data and the index together, or not at all...
#  wiki = PStore.new("wiki_pages.pstore")
#  wiki.transaction do  # begin transaction; do all of this or none of it
#    # store page...
#    wiki[home_page.page_name] = home_page
#    # ensure that an index has been created...
#    wiki[:wiki_index] ||= Array.new
#    # update wiki index...
#    wiki[:wiki_index].push(*home_page.wiki_page_references)
#  end                   # commit changes to wiki data store file
#  
#  ### Some time later... ###
#  
#  # read wiki data...
#  wiki.transaction(true) do  # begin read-only transaction, no changes allowed
#    wiki.roots.each do |data_root_name|
#      p data_root_name
#      p wiki[data_root_name]
#    end
#  end
#
# == Transaction modes
#
# By default, file integrity is only ensured as long as the operating system
# (and the underlying hardware) doesn't raise any unexpected I/O errors. If an
# I/O error occurs while PStore is writing to its file, then the file will
# become corrupted.
#
# You can prevent this by setting <em>pstore.ultra_safe = true</em>.
# However, this results in a minor performance loss, and only works on platforms
# that support atomic file renames. Please consult the documentation for
# +ultra_safe+ for details.
#
# Needless to say, if you're storing valuable data with PStore, then you should
# backup the PStore files from time to time.
class PStore
  binmode = defined?(File::BINARY) ? File::BINARY : 0
  RDWR_ACCESS = File::RDWR | File::CREAT | binmode
  RD_ACCESS = File::RDONLY | binmode
  WR_ACCESS = File::WRONLY | File::CREAT | File::TRUNC | binmode

  # The error type thrown by all PStore methods.
  class Error < StandardError
  end
  
  # Whether PStore should do its best to prevent file corruptions, even when under
  # unlikely-to-occur error conditions such as out-of-space conditions and other
  # unusual OS filesystem errors. Setting this flag comes at the price in the form
  # of a performance loss.
  #
  # This flag only has effect on platforms on which file renames are atomic (e.g.
  # all POSIX platforms: Linux, MacOS X, FreeBSD, etc). The default value is false.
  attr_accessor :ultra_safe

  # 
  # To construct a PStore object, pass in the _file_ path where you would like 
  # the data to be stored.
  #
  # PStore objects are always reentrant. But if _thread_safe_ is set to true,
  # then it will become thread-safe at the cost of a minor performance hit.
  # 
  def initialize(file, thread_safe = false)
    dir = File::dirname(file)
    unless File::directory? dir
      raise PStore::Error, format("directory %s does not exist", dir)
    end
    if File::exist? file and not File::readable? file
      raise PStore::Error, format("file %s not readable", file)
    end
    @transaction = false
    @filename = file
    @abort = false
    @ultra_safe = false
    if @thread_safe
      @lock = Mutex.new
    else
      @lock = DummyMutex.new
    end
  end

  # Raises PStore::Error if the calling code is not in a PStore#transaction.
  def in_transaction
    raise PStore::Error, "not in transaction" unless @transaction
  end
  # 
  # Raises PStore::Error if the calling code is not in a PStore#transaction or
  # if the code is in a read-only PStore#transaction.
  # 
  def in_transaction_wr()
    in_transaction()
    raise PStore::Error, "in read-only transaction" if @rdonly
  end
  private :in_transaction, :in_transaction_wr

  #
  # Retrieves a value from the PStore file data, by _name_.  The hierarchy of 
  # Ruby objects stored under that root _name_ will be returned.
  # 
  # *WARNING*:  This method is only valid in a PStore#transaction.  It will
  # raise PStore::Error if called at any other time.
  #
  def [](name)
    in_transaction
    @table[name]
  end
  #
  # This method is just like PStore#[], save that you may also provide a 
  # _default_ value for the object.  In the event the specified _name_ is not 
  # found in the data store, your _default_ will be returned instead.  If you do 
  # not specify a default, PStore::Error will be raised if the object is not 
  # found.
  # 
  # *WARNING*:  This method is only valid in a PStore#transaction.  It will
  # raise PStore::Error if called at any other time.
  #
  def fetch(name, default=PStore::Error)
    in_transaction
    unless @table.key? name
      if default == PStore::Error
        raise PStore::Error, format("undefined root name `%s'", name)
      else
        return default
      end
    end
    @table[name]
  end
  #
  # Stores an individual Ruby object or a hierarchy of Ruby objects in the data
  # store file under the root _name_.  Assigning to a _name_ already in the data
  # store clobbers the old data.
  # 
  # == Example:
  # 
  #  require "pstore"
  #  
  #  store = PStore.new("data_file.pstore")
  #  store.transaction do  # begin transaction
  #    # load some data into the store...
  #    store[:single_object] = "My data..."
  #    store[:obj_heirarchy] = { "Kev Jackson" => ["rational.rb", "pstore.rb"],
  #                              "James Gray"  => ["erb.rb", "pstore.rb"] }
  #  end                   # commit changes to data store file
  # 
  # *WARNING*:  This method is only valid in a PStore#transaction and it cannot
  # be read-only.  It will raise PStore::Error if called at any other time.
  #
  def []=(name, value)
    in_transaction_wr()
    @table[name] = value
  end
  #
  # Removes an object hierarchy from the data store, by _name_.
  # 
  # *WARNING*:  This method is only valid in a PStore#transaction and it cannot
  # be read-only.  It will raise PStore::Error if called at any other time.
  #
  def delete(name)
    in_transaction_wr()
    @table.delete name
  end

  #
  # Returns the names of all object hierarchies currently in the store.
  # 
  # *WARNING*:  This method is only valid in a PStore#transaction.  It will
  # raise PStore::Error if called at any other time.
  #
  def roots
    in_transaction
    @table.keys
  end
  #
  # Returns true if the supplied _name_ is currently in the data store.
  # 
  # *WARNING*:  This method is only valid in a PStore#transaction.  It will
  # raise PStore::Error if called at any other time.
  #
  def root?(name)
    in_transaction
    @table.key? name
  end
  # Returns the path to the data store file.
  def path
    @filename
  end

  #
  # Ends the current PStore#transaction, committing any changes to the data
  # store immediately.
  # 
  # == Example:
  # 
  #  require "pstore"
  #   
  #  store = PStore.new("data_file.pstore")
  #  store.transaction do  # begin transaction
  #    # load some data into the store...
  #    store[:one] = 1
  #    store[:two] = 2
  #  
  #    store.commit        # end transaction here, committing changes
  #  
  #    store[:three] = 3   # this change is never reached
  #  end
  # 
  # *WARNING*:  This method is only valid in a PStore#transaction.  It will
  # raise PStore::Error if called at any other time.
  #
  def commit
    in_transaction
    @abort = false
    throw :pstore_abort_transaction
  end
  #
  # Ends the current PStore#transaction, discarding any changes to the data
  # store.
  # 
  # == Example:
  # 
  #  require "pstore"
  #   
  #  store = PStore.new("data_file.pstore")
  #  store.transaction do  # begin transaction
  #    store[:one] = 1     # this change is not applied, see below...
  #    store[:two] = 2     # this change is not applied, see below...
  #  
  #    store.abort         # end transaction here, discard all changes
  #  
  #    store[:three] = 3   # this change is never reached
  #  end
  # 
  # *WARNING*:  This method is only valid in a PStore#transaction.  It will
  # raise PStore::Error if called at any other time.
  #
  def abort
    in_transaction
    @abort = true
    throw :pstore_abort_transaction
  end

  #
  # Opens a new transaction for the data store.  Code executed inside a block
  # passed to this method may read and write data to and from the data store 
  # file.
  # 
  # At the end of the block, changes are committed to the data store
  # automatically.  You may exit the transaction early with a call to either 
  # PStore#commit or PStore#abort.  See those methods for details about how
  # changes are handled.  Raising an uncaught Exception in the block is 
  # equivalent to calling PStore#abort.
  # 
  # If _read_only_ is set to +true+, you will only be allowed to read from the
  # data store during the transaction and any attempts to change the data will
  # raise a PStore::Error.
  # 
  # Note that PStore does not support nested transactions.
  #
  def transaction(read_only = false, &block)  # :yields:  pstore
    value = nil
    raise PStore::Error, "nested transaction" if @transaction
    @lock.synchronize do
      @rdonly = read_only
      @transaction = true
      @abort = false
      file = open_and_lock_file(@filename, read_only)
      if file
        begin
          @table, checksum, original_data_size = load_data(file, read_only)
          
          catch(:pstore_abort_transaction) do
            value = yield(self)
          end
          
          if !@abort && !read_only
            save_data(checksum, original_data_size, file)
          end
        ensure
          file.close if !file.closed?
        end
      else
        # This can only occur if read_only == true.
        @table = {}
        catch(:pstore_abort_transaction) do
          value = yield(self)
        end
      end
    end
  ensure
    @transaction = false
    value
  end
  
  private
  # Constant for relieving Ruby's garbage collector.
  EMPTY_STRING = ""
  EMPTY_MARSHAL_DATA = Marshal.dump({})
  EMPTY_MARSHAL_CHECKSUM = Digest::MD5.digest(EMPTY_MARSHAL_DATA)
  
  class DummyMutex
    def synchronize
      yield
    end
  end
  
  #
  # Open the specified filename (either in read-only mode or in
  # read-write mode) and lock it for reading or writing.
  #
  # The opened File object will be returned. If _read_only_ is true,
  # and the file does not exist, then nil will be returned.
  #
  # All exceptions are propagated.
  #
  def open_and_lock_file(filename, read_only)
    if read_only
      begin
        file = File.new(filename, RD_ACCESS)
        begin
          file.flock(File::LOCK_SH)
          return file
        rescue
          file.close
          raise
        end
      rescue Errno::ENOENT
        return nil
      end
    else
      file = File.new(filename, RDWR_ACCESS)
      file.flock(File::LOCK_EX)
      return file
    end
  end
  
  # Load the given PStore file.
  # If +read_only+ is true, the unmarshalled Hash will be returned.
  # If +read_only+ is false, a 3-tuple will be returned: the unmarshalled
  # Hash, an MD5 checksum of the data, and the size of the data.
  def load_data(file, read_only)
    if read_only
      begin
        table = load(file)
        if !table.is_a?(Hash)
          raise Error, "PStore file seems to be corrupted."
        end
      rescue EOFError
        # This seems to be a newly-created file.
        table = {}
      end
      table
    else
      data = file.read
      if data.empty?
        # This seems to be a newly-created file.
        table = {}
        checksum = empty_marshal_checksum
        size = empty_marshal_data.size
      else
        table = load(data)
        checksum = Digest::MD5.digest(data)
        size = data.size
        if !table.is_a?(Hash)
          raise Error, "PStore file seems to be corrupted."
        end
      end
      data.replace(EMPTY_STRING)
      [table, checksum, size]
    end
  end
  
  def on_windows?
    is_windows = RUBY_PLATFORM =~ /mswin/  ||
                 RUBY_PLATFORM =~ /mingw/  ||
                 RUBY_PLATFORM =~ /bbcwin/ ||
                 RUBY_PLATFORM =~ /wince/
    self.class.__send__(:define_method, :on_windows?) do
      is_windows
    end
    is_windows
  end
  
  # Check whether Marshal.dump supports the 'canonical' option. This option
  # makes sure that Marshal.dump always dumps data structures in the same order.
  # This is important because otherwise, the checksums that we generate may differ.
  def marshal_dump_supports_canonical_option?
    begin
      Marshal.dump(nil, -1, true)
      result = true
    rescue
      result = false
    end
    self.class.__send__(:define_method, :marshal_dump_supports_canonical_option?) do
      result
    end
    result
  end
  
  def save_data(original_checksum, original_file_size, file)
    # We only want to save the new data if the size or checksum has changed.
    # This results in less filesystem calls, which is good for performance.
    if marshal_dump_supports_canonical_option?
      new_data = Marshal.dump(@table, -1, true)
    else
      new_data = dump(@table)
    end
    new_checksum = Digest::MD5.digest(new_data)
    
    if new_data.size != original_file_size || new_checksum != original_checksum
      if @ultra_safe && !on_windows?
        # Windows doesn't support atomic file renames.
        save_data_with_atomic_file_rename_strategy(new_data, file)
      else
        save_data_with_fast_strategy(new_data, file)
      end
    end
    
    new_data.replace(EMPTY_STRING)
  end
  
  def save_data_with_atomic_file_rename_strategy(data, file)
    temp_filename = "#{@filename}.tmp.#{Process.pid}.#{rand 1000000}"
    temp_file = File.new(temp_filename, WR_ACCESS)
    begin
      temp_file.flock(File::LOCK_EX)
      temp_file.write(data)
      temp_file.flush
      File.rename(temp_filename, @filename)
    rescue
      File.unlink(temp_file) rescue nil
      raise
    ensure
      temp_file.close
    end
  end
  
  def save_data_with_fast_strategy(data, file)
    file.rewind
    file.truncate(0)
    file.write(data)
  end


  # This method is just a wrapped around Marshal.dump
  # to allow subclass overriding used in YAML::Store.
  def dump(table)  # :nodoc:
    Marshal::dump(table)
  end

  # This method is just a wrapped around Marshal.load.
  # to allow subclass overriding used in YAML::Store.
  def load(content)  # :nodoc:
    Marshal::load(content)
  end

  def empty_marshal_data
    EMPTY_MARSHAL_DATA
  end
  def empty_marshal_checksum
    EMPTY_MARSHAL_CHECKSUM
  end
end

# :enddoc:

if __FILE__ == $0
  db = PStore.new("/tmp/foo")
  db.transaction do
    p db.roots
    ary = db["root"] = [1,2,3,4]
    ary[1] = [1,1.5]
  end

  1000.times do
    db.transaction do
      db["root"][0] += 1
      p db["root"][0]
    end
  end

  db.transaction(true) do
    p db["root"]
  end
end