summaryrefslogtreecommitdiff
path: root/fs/remote.py
blob: 9364e64a7d39c554bfef7fc3abbb9dbbd1aa5893 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
"""

fs.remote
=========

Utilities for interfacing with remote filesystems


This module provides reusable utility functions that can be used to construct
FS subclasses interfacing with a remote filesystem.  These include:

  * RemoteFileBuffer:  a file-like object that locally buffers the contents of
                       a remote file, writing them back on flush() or close().

  * ConnectionManagerFS:  a WrapFS subclass that tracks the connection state
                          of a remote FS, and allows client code to wait for
                          a connection to be re-established.

   * CacheFS:  a WrapFS subclass that caches file and directory meta-data in
               memory, to speed access to a remote FS.

"""

from __future__ import with_statement

import sys
import os
import time
import copy
from errno import EINVAL

from fs.base import FS, threading
from fs.wrapfs import WrapFS, wrap_fs_methods
from fs.wrapfs.lazyfs import LazyFS
from fs.path import *
from fs.errors import *
from fs.local_functools import wraps
from fs.filelike import StringIO, SpooledTemporaryFile, FileWrapper
from fs import SEEK_SET, SEEK_CUR, SEEK_END


class RemoteFileBuffer(FileWrapper):
    """File-like object providing buffer for local file operations.

    Instances of this class manage a local tempfile buffer corresponding
    to the contents of a remote file.  All reads and writes happen locally,
    with the content being copied to the remote file only on flush() or
    close().  Writes to the remote file are performed using the setcontents()
    method on the owning FS object.

    The intended use-case is for a remote filesystem (e.g. S3FS) to return
    instances of this class from its open() method, and to provide the
    file-uploading logic in its setcontents() method, as in the following
    pseudo-code::

        def open(self,path,mode="r"):
            rf = self._get_remote_file(path)
            return RemoteFileBuffer(self,path,mode,rf)

        def setcontents(self,path,file):
            self._put_remote_file(path,file)

    The contents of the remote file are read into the buffer on-demand.
    """

    max_size_in_memory = 1024 * 8

    def __init__(self, fs, path, mode, rfile=None, write_on_flush=True):
        """RemoteFileBuffer constructor.

        The owning filesystem, path and mode must be provided.  If the
        optional argument 'rfile' is provided, it must be a read()-able
        object or a string containing the initial file contents.
        """
        wrapped_file = SpooledTemporaryFile(max_size=self.max_size_in_memory)
        self.fs = fs
        self.path = path
        self.write_on_flush = write_on_flush
        self._changed = False
        self._readlen = 0 # How many bytes already loaded from rfile
        self._rfile = None # Reference to remote file object 
        self._eof = False # Reached end of rfile?
        if getattr(fs,"_lock",None) is not None:
            self._lock = fs._lock.__class__()
        else:
            self._lock = threading.RLock()
        
        if "r" in mode or "+" in mode or "a" in mode:
            if rfile is None:
                # File was just created, force to write anything
                self._changed = True
                self._eof = True
                
            if not hasattr(rfile, "read"):
                rfile = StringIO(unicode(rfile))
                
            self._rfile = rfile
        else:
            # Do not use remote file object
            self._eof = True
            self._rfile = None
            if rfile is not None and hasattr(rfile,"close"):
                rfile.close()
        super(RemoteFileBuffer,self).__init__(wrapped_file,mode)
        # FIXME: What if mode with position on eof?
        if "a" in mode:
            # Not good enough...
            self.seek(0, SEEK_END)

    def __del__(self):
        #  Don't try to close a partially-constructed file
        if "_lock" in self.__dict__:
            if not self.closed:
                self.close()

    def _write(self,data,flushing=False):
        with self._lock:
            #  Do we need to discard info from the buffer?
            toread = len(data) - (self._readlen - self.wrapped_file.tell())
            if toread > 0:
                if not self._eof:
                    self._fillbuffer(toread)
                else:
                    self._readlen += toread
            self._changed = True
            self.wrapped_file.write(data)

    def _read_remote(self, length=None):
        """Read data from the remote file into the local buffer."""
        chunklen = 1024 * 256
        bytes_read = 0
        while True:
            toread = chunklen
            if length is not None and length - bytes_read < chunklen:
                toread = length - bytes_read
            if not toread:
                break
             
            data = self._rfile.read(toread)
            datalen = len(data)
            if not datalen:
                self._eof = True
                break
            
            bytes_read += datalen 
            self.wrapped_file.write(data)
            
            if datalen < toread:
                # We reached EOF,
                # no more reads needed
                self._eof = True
                break
        
        if self._eof and self._rfile is not None:
            self._rfile.close()
        self._readlen += bytes_read
    
    def _fillbuffer(self, length=None):  
        """Fill the local buffer, leaving file position unchanged.

        This method is used for on-demand loading of data from the remote file
        into the buffer.  It reads 'length' bytes from rfile and writes them
        into the buffer, seeking back to the original file position.
        """
        curpos = self.wrapped_file.tell()
        if length == None:
            if not self._eof:
                # Read all data and we didn't reached EOF
                # Merge endpos - tell + bytes from rfile
                self.wrapped_file.seek(0, SEEK_END)
                self._read_remote()
                self._eof = True
                self.wrapped_file.seek(curpos)
               
        elif not self._eof:
            if curpos + length > self._readlen:
                # Read all data and we didn't reached EOF
                # Load endpos - tell() + len bytes from rfile
                toload = length - (self._readlen - curpos)
                self.wrapped_file.seek(0, SEEK_END)
                self._read_remote(toload)
                self.wrapped_file.seek(curpos)
        
    def _read(self, length=None):
        if length < 0:
            length = None
        with self._lock:
            self._fillbuffer(length)
            data = self.wrapped_file.read(length if length != None else -1)
            if not data:
                data = None
            return data

    def _seek(self,offset,whence=SEEK_SET):
        with self._lock:
            if not self._eof:
                # Count absolute position of seeking
                if whence == SEEK_SET:
                    abspos = offset
                elif whence == SEEK_CUR:
                    abspos =  offset + self.wrapped_file.tell()
                elif whence == SEEK_END:
                    abspos = None
                else:
                    raise IOError(EINVAL, 'Invalid whence')
            
                if abspos != None:
                    toread = abspos - self._readlen
                    if toread > 0:
                        self.wrapped_file.seek(self._readlen)
                        self._fillbuffer(toread)
                else:
                    self.wrapped_file.seek(self._readlen)
                    self._fillbuffer()
                
            self.wrapped_file.seek(offset, whence)

    def _truncate(self,size):
        with self._lock:
            if not self._eof and self._readlen < size:
                # Read the rest of file
                self._fillbuffer(size - self._readlen)
                # Lock rfile
                self._eof = True  
            elif self._readlen >= size:
                # Crop rfile metadata
                self._readlen = size if size != None else 0
                # Lock rfile
                self._eof = True

            self.wrapped_file.truncate(size)
            self._changed = True
                
            self.flush()
            if self._rfile is not None:
                self._rfile.close()

    def flush(self):
        with self._lock:
            self.wrapped_file.flush()
            if self.write_on_flush:
                self._setcontents()

    def _setcontents(self):
        if not self._changed:
            # Nothing changed, no need to write data back
            return
        
        # If not all data loaded, load until eof
        if not self._eof:
            self._fillbuffer()
            
        if "w" in self.mode or "a" in self.mode or "+" in self.mode:
            pos = self.wrapped_file.tell()
            self.wrapped_file.seek(0)
            self.fs.setcontents(self.path, self.wrapped_file)
            self.wrapped_file.seek(pos)
    
    def close(self):
        with self._lock:
            if not self.closed:
                self._setcontents()
                if self._rfile is not None:
                    self._rfile.close()
                super(RemoteFileBuffer,self).close()


class ConnectionManagerFS(LazyFS):
    """FS wrapper providing simple connection management of a remote FS.

    The ConnectionManagerFS class is designed to wrap a remote FS object
    and provide some convenience methods for dealing with its remote
    connection state.

    The boolean attribute 'connected' indicates whether the remote fileystem
    has an active connection, and is initially True.  If any of the remote
    filesystem methods raises a RemoteConnectionError, 'connected' will
    switch to False and remain so until a successful remote method call.

    Application code can use the method 'wait_for_connection' to block
    until the connection is re-established.  Currently this reconnection
    is checked by a simple polling loop; eventually more sophisticated
    operating-system integration may be added.

    Since some remote FS classes can raise RemoteConnectionError during
    initialisation, this class makes use of lazy initialization. The
    remote FS can be specified as an FS instance, an FS subclass, or a
    (class,args) or (class,args,kwds) tuple. For example::

        >>> fs = ConnectionManagerFS(MyRemoteFS("http://www.example.com/"))
        Traceback (most recent call last):
            ...
        RemoteConnectionError: couldn't connect to "http://www.example.com/"
        >>> fs = ConnectionManagerFS((MyRemoteFS,["http://www.example.com/"]))
        >>> fs.connected
        False
        >>>

    """

    poll_interval = 1

    def __init__(self,wrapped_fs,poll_interval=None,connected=True):
        super(ConnectionManagerFS,self).__init__(wrapped_fs)
        if poll_interval is not None:
            self.poll_interval = poll_interval
        self._connection_cond = threading.Condition()
        self._poll_thread = None
        self._poll_sleeper = threading.Event()
        self.connected = connected

    def setcontents(self, path, data, chunk_size=64*1024):
        return self.wrapped_fs.setcontents(path, data, chunk_size=chunk_size)

    def __getstate__(self):
        state = super(ConnectionManagerFS,self).__getstate__()
        del state["_connection_cond"]
        del state["_poll_sleeper"]
        state["_poll_thread"] = None
        return state

    def __setstate__(self,state):
        super(ConnectionManagerFS,self).__setstate__(state)
        self._connection_cond = threading.Condition()
        self._poll_sleeper = threading.Event()
        
    def wait_for_connection(self,timeout=None):
        self._connection_cond.acquire()
        try:
            if not self.connected:
                if not self._poll_thread:
                    target = self._poll_connection
                    self._poll_thread = threading.Thread(target=target)
                    self._poll_thread.daemon = True
                    self._poll_thread.start()
                self._connection_cond.wait(timeout)
        finally:
            self._connection_cond.release()

    def _poll_connection(self):
        while not self.connected and not self.closed:
            try:
                self.wrapped_fs.isdir("")
            except RemoteConnectionError:
                self._poll_sleeper.wait(self.poll_interval)
                self._poll_sleeper.clear()
            except FSError:
                break
            else:
                break
        self._connection_cond.acquire()
        try:
            if not self.closed:
                self.connected = True
            self._poll_thread = None
            self._connection_cond.notifyAll()
        finally:
            self._connection_cond.release()

    def close(self):
        if not self.closed:
            try:
                super(ConnectionManagerFS,self).close()
            except (RemoteConnectionError,):
                pass
            if self._poll_thread:
                self.connected = True
                self._poll_sleeper.set()
                self._poll_thread.join()
                self._poll_thread = None

def _ConnectionManagerFS_method_wrapper(func):
    """Method wrapper for ConnectionManagerFS.

    This method wrapper keeps an eye out for RemoteConnectionErrors and
    adjusts self.connected accordingly.
    """
    @wraps(func)
    def wrapper(self,*args,**kwds):
        try:
            result = func(self,*args,**kwds)
        except RemoteConnectionError:
            self.connected = False
            raise
        except FSError:
            self.connected = True
            raise
        else:
            self.connected = True
            return result
    return wrapper
 
wrap_fs_methods(_ConnectionManagerFS_method_wrapper,ConnectionManagerFS)


def _cached_method(func):
    """Method decorator that caches results for CacheFS."""
    @wraps(func)
    def wrapper(self,path="",*args,**kwds):
        try:
            (success,result) = self._cache_get(path,func.__name__,args,kwds)
        except KeyError:
            try:
                res = func(self,path,*args,**kwds)
            except Exception, e:
                self._cache_set(path,func.__name__,args,kwds,(False,e))
                raise
            else:
                self._cache_set(path,func.__name__,args,kwds,(True,res))
                return copy.copy(res)
        else:
            if not success:
                raise result
            else:
                return copy.copy(result)
    return wrapper


class CacheFS(WrapFS):
    """Simple wrapper to cache meta-data of a remote filesystems.

    This FS wrapper implements a simplistic cache that can help speed up
    access to a remote filesystem.  File and directory meta-data is cached
    but the actual file contents are not.
    """

    def __init__(self,fs,timeout=1):
        """CacheFS constructor.

        The optional argument 'timeout' specifies the cache timeout in
        seconds.  The default timeout is 1 second.  To prevent cache
        entries from ever timing out, set it to None.
        """
        self.timeout = timeout
        self._cache = {"":{}}
        super(CacheFS,self).__init__(fs)

    def _path_cache(self,path):
        cache = self._cache
        for name in iteratepath(path):
            cache = cache.setdefault(name,{"":{}})
        return cache

    def _cache_get(self,path,func,args,kwds):
        now = time.time()
        cache = self._path_cache(path)
        key = (tuple(args),tuple(sorted(kwds.iteritems())))
        (t,v) = cache[""][func][key]
        if self.timeout is not None:
            if t < now - self.timeout:
                raise KeyError
        return v

    def _cache_set(self,path,func,args,kwds,v):
        t = time.time()
        cache = self._path_cache(path)
        key = (tuple(args),tuple(sorted(kwds.iteritems())))
        cache[""].setdefault(func,{})[key] = (t,v)

    def _uncache(self,path,added=False,removed=False,unmoved=False):
        cache = self._cache
        names = list(iteratepath(path))
        # If it's not the root dir, also clear some items for ancestors
        if names:
            # Clear cached 'getinfo' and 'getsize' for all ancestors 
            for name in names[:-1]:
                cache[""].pop("getinfo",None)
                cache[""].pop("getsize",None)
                cache = cache.get(name,None)
                if cache is None:
                    return 
            # Adjust cached 'listdir' for parent directory.
            # TODO: account for whether it was added, removed, or unmoved
            cache[""].pop("getinfo",None)
            cache[""].pop("getsize",None)
            cache[""].pop("listdir",None)
            cache[""].pop("listdirinfo",None)
        # Clear all cached info for the path itself.
        if names:
            cache[names[-1]] = {"":{}}
        else:
            cache[""] = {}

    @_cached_method
    def exists(self,path):
        return super(CacheFS,self).exists(path)

    @_cached_method
    def isdir(self,path):
        return super(CacheFS,self).isdir(path)

    @_cached_method
    def isfile(self,path):
        return super(CacheFS,self).isfile(path)

    @_cached_method
    def listdir(self,path="",**kwds):
        return super(CacheFS,self).listdir(path,**kwds)

    @_cached_method
    def listdirinfo(self,path="",**kwds):
        return super(CacheFS,self).listdirinfo(path,**kwds)

    @_cached_method
    def getinfo(self,path):
        return super(CacheFS,self).getinfo(path)

    @_cached_method
    def getsize(self,path):
        return super(CacheFS,self).getsize(path)

    @_cached_method
    def getxattr(self,path,name,default=None):
        return super(CacheFS,self).getxattr(path,name,default)

    @_cached_method
    def listxattrs(self,path):
        return super(CacheFS,self).listxattrs(path)

    def open(self,path,mode="r"):
        f = super(CacheFS,self).open(path,mode)
        self._uncache(path,unmoved=True)
        return f

    def setcontents(self, path, contents='', chunk_size=64*1024):
        res = super(CacheFS,self).setcontents(path, contents, chunk_size=chunk_size)
        self._uncache(path,unmoved=True)
        return res

    def getcontents(self,path):
        res = super(CacheFS,self).getcontents(path)
        self._uncache(path,unmoved=True)
        return res

    def makedir(self,path,**kwds):
        super(CacheFS,self).makedir(path,**kwds)
        self._uncache(path,added=True)

    def remove(self,path):
        super(CacheFS,self).remove(path)
        self._uncache(path,removed=True)

    def removedir(self,path,**kwds):
        super(CacheFS,self).removedir(path,**kwds)
        self._uncache(path,removed=True)

    def rename(self,src,dst):
        super(CacheFS,self).rename(src,dst)
        self._uncache(src,removed=True)
        self._uncache(dst,added=True)

    def copy(self,src,dst,**kwds):
        super(CacheFS,self).copy(src,dst,**kwds)
        self._uncache(dst,added=True)

    def copydir(self,src,dst,**kwds):
        super(CacheFS,self).copydir(src,dst,**kwds)
        self._uncache(dst,added=True)

    def move(self,src,dst,**kwds):
        super(CacheFS,self).move(src,dst,**kwds)
        self._uncache(src,removed=True)
        self._uncache(dst,added=True)

    def movedir(self,src,dst,**kwds):
        super(CacheFS,self).movedir(src,dst,**kwds)
        self._uncache(src,removed=True)
        self._uncache(dst,added=True)

    def setxattr(self,path,name,value):
        self._uncache(path,unmoved=True)
        return super(CacheFS,self).setxattr(path,name,value)

    def delxattr(self,path,name):
        self._uncache(path,unmoved=True)
        return super(CacheFS,self).delxattr(path,name)