summaryrefslogtreecommitdiff
path: root/pelican/cache.py
blob: c1d4a55d1eb879a8b39f2ae30886b72652e1b46e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import hashlib
import logging
import os
import pickle

from pelican.utils import mkdir_p

logger = logging.getLogger(__name__)


class FileDataCacher:
    """Class that can cache data contained in files"""

    def __init__(self, settings, cache_name, caching_policy, load_policy):
        """Load the specified cache within CACHE_PATH in settings

        only if *load_policy* is True,
        May use gzip if GZIP_CACHE ins settings is True.
        Sets caching policy according to *caching_policy*.
        """
        self.settings = settings
        self._cache_path = os.path.join(self.settings['CACHE_PATH'],
                                        cache_name)
        self._cache_data_policy = caching_policy
        if self.settings['GZIP_CACHE']:
            import gzip
            self._cache_open = gzip.open
        else:
            self._cache_open = open
        if load_policy:
            try:
                with self._cache_open(self._cache_path, 'rb') as fhandle:
                    self._cache = pickle.load(fhandle)
            except (OSError, UnicodeDecodeError) as err:
                logger.debug('Cannot load cache %s (this is normal on first '
                             'run). Proceeding with empty cache.\n%s',
                             self._cache_path, err)
                self._cache = {}
            except pickle.PickleError as err:
                logger.warning('Cannot unpickle cache %s, cache may be using '
                               'an incompatible protocol (see pelican '
                               'caching docs). '
                               'Proceeding with empty cache.\n%s',
                               self._cache_path, err)
                self._cache = {}
        else:
            self._cache = {}

    def cache_data(self, filename, data):
        """Cache data for given file"""
        if self._cache_data_policy:
            self._cache[filename] = data

    def get_cached_data(self, filename, default=None):
        """Get cached data for the given file

        if no data is cached, return the default object
        """
        return self._cache.get(filename, default)

    def save_cache(self):
        """Save the updated cache"""
        if self._cache_data_policy:
            try:
                mkdir_p(self.settings['CACHE_PATH'])
                with self._cache_open(self._cache_path, 'wb') as fhandle:
                    pickle.dump(self._cache, fhandle)
            except (OSError, pickle.PicklingError, TypeError) as err:
                logger.warning('Could not save cache %s\n ... %s',
                               self._cache_path, err)


class FileStampDataCacher(FileDataCacher):
    """Subclass that also caches the stamp of the file"""

    def __init__(self, settings, cache_name, caching_policy, load_policy):
        """This sublcass additionally sets filestamp function
        and base path for filestamping operations
        """

        super().__init__(settings, cache_name, caching_policy, load_policy)

        method = self.settings['CHECK_MODIFIED_METHOD']
        if method == 'mtime':
            self._filestamp_func = os.path.getmtime
        else:
            try:
                hash_func = getattr(hashlib, method)

                def filestamp_func(filename):
                    """return hash of file contents"""
                    with open(filename, 'rb') as fhandle:
                        return hash_func(fhandle.read()).digest()

                self._filestamp_func = filestamp_func
            except AttributeError as err:
                logger.warning('Could not get hashing function\n\t%s', err)
                self._filestamp_func = None

    def cache_data(self, filename, data):
        """Cache stamp and data for the given file"""
        stamp = self._get_file_stamp(filename)
        super().cache_data(filename, (stamp, data))

    def _get_file_stamp(self, filename):
        """Check if the given file has been modified
        since the previous build.

        depending on CHECK_MODIFIED_METHOD
        a float may be returned for 'mtime',
        a hash for a function name in the hashlib module
        or an empty bytes string otherwise
        """

        try:
            return self._filestamp_func(filename)
        except (OSError, TypeError) as err:
            logger.warning('Cannot get modification stamp for %s\n\t%s',
                           filename, err)
            return ''

    def get_cached_data(self, filename, default=None):
        """Get the cached data for the given filename
        if the file has not been modified.

        If no record exists or file has been modified, return default.
        Modification is checked by comparing the cached
        and current file stamp.
        """

        stamp, data = super().get_cached_data(filename, (None, default))
        if stamp != self._get_file_stamp(filename):
            return default
        return data