summaryrefslogtreecommitdiff
path: root/buildstream/_yamlcache.py
blob: 07a1b8d5fce3af99d2ff64f14905b9b8885a6ee8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
#
#  Copyright 2018 Bloomberg Finance LP
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU Lesser General Public
#  License as published by the Free Software Foundation; either
#  version 2 of the License, or (at your option) any later version.
#
#  This library is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public
#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
#
#  Authors:
#        Jonathan Maw <jonathan.maw@codethink.co.uk>

import os
import pickle
import hashlib
import io

import sys

from contextlib import contextmanager
from collections import namedtuple

from ._context import Context
from . import _yaml


YAML_CACHE_FILENAME = "yaml_cache.pickle"


# YamlCache()
#
# A cache that wraps around the loading of yaml in projects.
#
# The recommended way to use a YamlCache is:
#   with YamlCache.open(context) as yamlcache:
#     # Load all the yaml
#     ...
#
# Args:
#    context (Context): The invocation Context
#
class YamlCache():

    def __init__(self, context):
        self._project_caches = {}
        self._context = context

    ##################
    # Public Methods #
    ##################

    # is_cached():
    #
    # Checks whether a file is cached.
    #
    # Args:
    #    project (Project): The project this file is in.
    #    filepath (str): The path to the file, *relative to the project's directory*.
    #
    # Returns:
    #    (bool): Whether the file is cached.
    def is_cached(self, project, filepath):
        cache_path = self._get_filepath(project, filepath)
        project_name = self.get_project_name(project)
        try:
            project_cache = self._project_caches[project_name]
            if cache_path in project_cache.elements:
                return True
        except KeyError:
            pass
        return False

    # open():
    #
    # Return an instance of the YamlCache which writes to disk when it leaves scope.
    #
    # Args:
    #    context (Context): The context.
    #    cachefile (str): The path to the cache file.
    #
    # Returns:
    #    (YamlCache): A YamlCache.
    @staticmethod
    @contextmanager
    def open(context, cachefile):
        # Try to load from disk first
        cache = None
        if os.path.exists(cachefile):
            try:
                with open(cachefile, "rb") as f:
                    cache = BstUnpickler(f, context).load()
            except EOFError:
                # The file was empty
                pass
            except pickle.UnpicklingError as e:
                sys.stderr.write("Failed to load YamlCache, {}\n".format(e))

        # Failed to load from disk, create a new one
        if not cache:
            cache = YamlCache(context)

        yield cache

        cache._write(cachefile)

    # get_cache_file():
    #
    # Retrieves a path to the yaml cache file.
    #
    # Returns:
    #   (str): The path to the cache file
    @staticmethod
    def get_cache_file(top_dir):
        return os.path.join(top_dir, ".bst", YAML_CACHE_FILENAME)

    # get():
    #
    # Gets a parsed file from the cache.
    #
    # Args:
    #    project (Project) or None: The project this file is in, if it exists.
    #    filepath (str): The absolute path to the file.
    #    copy_tree (bool): Whether the data should make a copy when it's being generated
    #                      (i.e. exactly as when called in yaml)
    #
    # Returns:
    #    (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
    #    (str):            The key used to look up the parsed yaml in the cache
    def get(self, project, filepath, copy_tree):
        key = self._calculate_key(project, filepath, copy_tree)
        data = self._get(project, filepath, key)
        return data, key

    # put():
    #
    # Puts a parsed file into the cache.
    #
    # Args:
    #    project (Project): The project this file is in.
    #    filepath (str): The path to the file.
    #    copy_tree (bool): Whether the data should make a copy when it's being generated
    #                      (i.e. exactly as when called in yaml)
    #    value (decorated dict): The data to put into the cache.
    def put(self, project, filepath, copy_tree, value):
        key = self._calculate_key(project, filepath, copy_tree)
        self.put_from_key(project, filepath, key, value)

    # put_from_key():
    #
    # Put a parsed file into the cache when given a key.
    #
    # Args:
    #    project (Project): The project this file is in.
    #    filepath (str): The path to the file.
    #    key (str): The key to the file within the cache. Typically, this is the
    #               value of `calculate_key()` with the file's unparsed contents
    #               and any relevant metadata passed in.
    #    value (decorated dict): The data to put into the cache.
    def put_from_key(self, project, filepath, key, value):
        cache_path = self._get_filepath(project, filepath)
        project_name = self.get_project_name(project)
        try:
            project_cache = self._project_caches[project_name]
        except KeyError:
            project_cache = self._project_caches[project_name] = CachedProject({})

        project_cache.elements[cache_path] = CachedYaml(key, value)

    ###################
    # Private Methods #
    ###################

    # Writes the yaml cache to the specified path.
    #
    # Args:
    #    path (str): The path to the cache file.
    def _write(self, path):
        parent_dir = os.path.dirname(path)
        os.makedirs(parent_dir, exist_ok=True)
        with open(path, "wb") as f:
            BstPickler(f).dump(self)

    # _get_filepath():
    #
    # Returns a file path relative to a project if passed, or the original path if
    # the project is None
    #
    # Args:
    #    project (Project) or None: The project the filepath exists within
    #    full_path (str): The path that the returned path is based on
    #
    # Returns:
    #    (str): The path to the file, relative to a project if it exists
    def _get_filepath(self, project, full_path):
        if project:
            assert full_path.startswith(project.directory)
            filepath = os.path.relpath(full_path, project.directory)
        else:
            filepath = full_path
        return filepath

    # _calculate_key():
    #
    # Calculates a key for putting into the cache.
    #
    # Args:
    #    project (Project) or None: The project this file is in.
    #    filepath (str): The path to the file.
    #    copy_tree (bool): Whether the data should make a copy when it's being generated
    #                      (i.e. exactly as when called in yaml)
    #
    # Returns:
    #   (str): A key made out of every arg passed in
    @staticmethod
    def _calculate_key(project, filepath, copy_tree):
        if project and project.junction:
            # files in a junction only change if the junction element changes
            # NOTE: This may change when junction workspaces are revisited/fixed
            content_key = project.junction._get_cache_key()
        else:
            stat = os.stat(filepath)
            content_key = stat.st_mtime
        string = pickle.dumps(content_key, copy_tree)
        return hashlib.sha1(string).hexdigest()

    # _get():
    #
    # Gets a parsed file from the cache when given a key.
    #
    # Args:
    #    project (Project): The project this file is in.
    #    filepath (str): The path to the file.
    #    key (str): The key to the file within the cache. Typically, this is the
    #               value of `calculate_key()` with the file's unparsed contents
    #               and any relevant metadata passed in.
    #
    # Returns:
    #    (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
    def _get(self, project, filepath, key):
        cache_path = self._get_filepath(project, filepath)
        project_name = self.get_project_name(project)
        try:
            project_cache = self._project_caches[project_name]
            try:
                cachedyaml = project_cache.elements[cache_path]
                if cachedyaml._key == key:
                    # We've unpickled the YamlCache, but not the specific file
                    if cachedyaml._contents is None:
                        cachedyaml._contents = BstUnpickler.loads(cachedyaml._pickled_contents, self._context)
                    return cachedyaml._contents
            except KeyError:
                pass
        except KeyError:
            pass
        return None

    # get_project_name():
    #
    # Gets a name appropriate for Project. Projects must use their junction's
    # name if present, otherwise elements with the same contents under the
    # same path with identically-named projects are considered the same yaml
    # object, despite existing in different Projects.
    #
    # Args:
    #    project (Project): The project this file is in, or None.
    #
    # Returns:
    #    (str): The project's junction's name if present, the project's name,
    #           or an empty string if there is no project
    @staticmethod
    def get_project_name(project):
        if project:
            if project.junction:
                project_name = project.junction.name
            else:
                project_name = project.name
        else:
            project_name = ""
        return project_name


CachedProject = namedtuple('CachedProject', ['elements'])


class CachedYaml():
    def __init__(self, key, contents):
        self._key = key
        self.set_contents(contents)

    # Sets the contents of the CachedYaml.
    #
    # Args:
    #    contents (provenanced dict): The contents to put in the cache.
    #
    def set_contents(self, contents):
        self._contents = contents
        self._pickled_contents = BstPickler.dumps(contents)

    # Pickling helper method, prevents 'contents' from being serialised
    def __getstate__(self):
        data = self.__dict__.copy()
        data['_contents'] = None
        return data


# In _yaml.load, we have a ProvenanceFile that stores the project the file
# came from. Projects can't be pickled, but it's always going to be the same
# project between invocations (unless the entire project is moved but the
# file stayed in the same place)
class BstPickler(pickle.Pickler):
    def persistent_id(self, obj):
        if isinstance(obj, _yaml.ProvenanceFile):
            if obj.project:
                # ProvenanceFile's project object cannot be stored as it is.
                project_tag = YamlCache.get_project_name(obj.project)
                # ProvenanceFile's filename must be stored relative to the
                # project, as the project dir may move.
                name = os.path.relpath(obj.name, obj.project.directory)
            else:
                project_tag = None
                name = obj.name
            return ("ProvenanceFile", name, obj.shortname, project_tag)
        elif isinstance(obj, Context):
            return ("Context",)
        else:
            return None

    @staticmethod
    def dumps(obj):
        stream = io.BytesIO()
        BstPickler(stream).dump(obj)
        stream.seek(0)
        return stream.read()


class BstUnpickler(pickle.Unpickler):
    def __init__(self, file, context):
        super().__init__(file)
        self._context = context

    def persistent_load(self, pid):
        if pid[0] == "ProvenanceFile":
            _, tagged_name, shortname, project_tag = pid

            if project_tag is not None:
                for p in self._context.get_projects():
                    if YamlCache.get_project_name(p) == project_tag:
                        project = p
                        break

                name = os.path.join(project.directory, tagged_name)

                if not project:
                    projects = [YamlCache.get_project_name(p) for p in self._context.get_projects()]
                    raise pickle.UnpicklingError("No project with name {} found in {}"
                                                 .format(project_tag, projects))
            else:
                project = None
                name = tagged_name

            return _yaml.ProvenanceFile(name, shortname, project)
        elif pid[0] == "Context":
            return self._context
        else:
            raise pickle.UnpicklingError("Unsupported persistent object, {}".format(pid))

    @staticmethod
    def loads(text, context):
        stream = io.BytesIO()
        stream.write(bytes(text))
        stream.seek(0)
        return BstUnpickler(stream, context).load()