summaryrefslogtreecommitdiff
path: root/src/InodeCache.hpp
blob: eea3b49d662bf56ab2764353d56b976145b87ac5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// Copyright (C) 2020-2023 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3 of the License, or (at your option)
// any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 51
// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

#pragma once

#include <Fd.hpp>
#include <hashutil.hpp>
#include <util/Duration.hpp>
#include <util/TimePoint.hpp>

#include <cstdint>
#include <functional>
#include <string>

class Config;
class Context;
class Digest;

class InodeCache
{
public:
  // Specifies in which mode a file was hashed since the hash result does not
  // only depend on the actual content but also on operations that were
  // performed that affect the return value. For example, source code files are
  // normally scanned for macros while binary files are not.
  enum class ContentType {
    // The file was not scanned for temporal macros.
    raw = 0,
    // The file was checked for temporal macros (see check_for_temporal_macros
    // in hashutil).
    checked_for_temporal_macros = 1,
  };

  // `min_age` specifies how old a file must be to be put in the cache. The
  // reason for this is that there is a race condition that consists of these
  // events:
  //
  // 1. A file is written with content C1, size S and timestamp (ctime/mtime) T.
  // 2. Ccache hashes the file content and asks the inode cache to store the
  //    digest with a hash of S and T (and some other data) as the key.
  // 3. The file is quickly thereafter written with content C2 without changing
  //    size S and timestamp T. The timestamp is not updated since the file
  //    writes are made within a time interval smaller than the granularity of
  //    the clock used for file system timestamps. At the time of writing, a
  //    common granularity on a Linux system is 0.004 s (250 Hz).
  // 4. The inode cache is asked for the file digest and the inode cache
  //    delivers a digest of C1 even though the file's content is C2.
  //
  // To avoid the race condition, the inode cache only caches inodes whose
  // timestamp was updated more than `min_age` ago. The default value is a
  // conservative 2 seconds since not all file systems have subsecond
  // resolution.
  InodeCache(const Config& config, util::Duration min_age = util::Duration(2));
  ~InodeCache();

  // Return whether it's possible to use the inode cache on the filesystem
  // associated with `fd`.
  static bool available(int fd);

  // Get saved hash digest and return value from a previous call to
  // do_hash_file() in hashutil.cpp.
  //
  // Returns true if saved values could be retrieved from the cache, false
  // otherwise.
  bool get(const std::string& path,
           ContentType type,
           Digest& file_digest,
           HashSourceCodeResult* return_value = nullptr);

  // Put hash digest and return value from a successful call to do_hash_file()
  // in hashutil.cpp.
  //
  // Returns true if values could be stored in the cache, false otherwise.
  bool put(const std::string& path,
           ContentType type,
           const Digest& file_digest,
           HashSourceCodeResult return_value);

  // Unmaps the current cache and removes the mapped file from disk.
  //
  // Returns true on success, false otherwise.
  bool drop();

  // Returns name of the persistent file.
  std::string get_file();

  // Returns total number of cache hits.
  //
  // Counters are incremented in debug mode only.
  int64_t get_hits();

  // Returns total number of cache misses.
  //
  // Counters are incremented in debug mode only.
  int64_t get_misses();

  // Returns total number of errors.
  //
  // Currently only lock errors will be counted, since the counter is not
  // accessible before the file has been successfully mapped into memory.
  //
  // Counters are incremented in debug mode only.
  int64_t get_errors();

private:
  struct Bucket;
  struct Entry;
  struct Key;
  struct SharedRegion;
  using BucketHandler = std::function<void(Bucket* bucket)>;

  bool mmap_file(const std::string& inode_cache_file);
  bool hash_inode(const std::string& path, ContentType type, Digest& digest);
  bool with_bucket(const Digest& key_digest,
                   const BucketHandler& bucket_handler);
  static bool create_new_file(const std::string& filename);
  bool initialize();

  const Config& m_config;
  util::Duration m_min_age;
  Fd m_fd;
  struct SharedRegion* m_sr = nullptr;
  bool m_failed = false;
  const pid_t m_self_pid;
  util::TimePoint m_last_fs_space_check;
};