summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavol Sakac <26042917+pavsa@users.noreply.github.com>2019-05-05 21:04:30 +0200
committerJoel Rosdahl <joel@rosdahl.net>2019-05-05 21:04:30 +0200
commiteae97078ab2b0c7c8bcf1f38ebe8fe64600d05ad (patch)
tree0c62103bcea6fe7b2e4c0a6955f327acf5b40e86
parent2be7c2698c6d56ec480a6ab188d02181e18f5d63 (diff)
downloadccache-eae97078ab2b0c7c8bcf1f38ebe8fe64600d05ad.tar.gz
Fix object size verification + bump to 64 bit file sizes in manifest (#407)
Changed manifest format to save the actual file size along with hashed content size. File size field in manifest updated to 64bits. Manifest version set to 2. Fixes #382.
-rw-r--r--src/ccache.c10
-rw-r--r--src/hashutil.c2
-rw-r--r--src/hashutil.h2
-rw-r--r--src/manifest.c87
-rw-r--r--src/manifest.h2
-rw-r--r--test/suites/depend.bash1
-rw-r--r--test/suites/direct.bash24
7 files changed, 83 insertions, 45 deletions
diff --git a/src/ccache.c b/src/ccache.c
index 7fa94146..590cf866 100644
--- a/src/ccache.c
+++ b/src/ccache.c
@@ -742,13 +742,13 @@ remember_include_file(char *path, struct hash *cpp_hash, bool system,
struct file_hash *h = x_malloc(sizeof(*h));
hash_result_as_bytes(fhash, h->hash);
- h->size = hash_input_size(fhash);
+ h->hashed_content_size = hash_input_size(fhash);
hashtable_insert(included_files, path, h);
path = NULL; // Ownership transferred to included_files.
if (depend_mode_hash) {
hash_delimiter(depend_mode_hash, "include");
- char *result = format_hash_as_string(h->hash, h->size);
+ char *result = format_hash_as_string(h->hash, h->hashed_content_size);
hash_string(depend_mode_hash, result);
free(result);
}
@@ -1169,7 +1169,7 @@ object_hash_from_depfile(const char *depfile, struct hash *hash)
struct file_hash *result = x_malloc(sizeof(*result));
hash_result_as_bytes(hash, result->hash);
- result->size = hash_input_size(hash);
+ result->hashed_content_size = hash_input_size(hash);
return result;
}
@@ -1366,7 +1366,7 @@ update_manifest_file(void)
static void
update_cached_result_globals(struct file_hash *hash)
{
- char *object_name = format_hash_as_string(hash->hash, hash->size);
+ char *object_name = format_hash_as_string(hash->hash, hash->hashed_content_size);
cached_obj_hash = hash;
cached_obj = get_path_in_cache(object_name, ".o");
cached_stderr = get_path_in_cache(object_name, ".stderr");
@@ -1728,7 +1728,7 @@ get_object_name_from_cpp(struct args *args, struct hash *hash)
struct file_hash *result = x_malloc(sizeof(*result));
hash_result_as_bytes(hash, result->hash);
- result->size = hash_input_size(hash);
+ result->hashed_content_size = hash_input_size(hash);
return result;
}
diff --git a/src/hashutil.c b/src/hashutil.c
index edf446c4..5f2ae791 100644
--- a/src/hashutil.c
+++ b/src/hashutil.c
@@ -41,7 +41,7 @@ int
file_hashes_equal(struct file_hash *fh1, struct file_hash *fh2)
{
return memcmp(fh1->hash, fh2->hash, 16) == 0
- && fh1->size == fh2->size;
+ && fh1->hashed_content_size == fh2->hashed_content_size;
}
// Search for the strings "__DATE__" and "__TIME__" in str.
diff --git a/src/hashutil.h b/src/hashutil.h
index b6b916ef..3dc7ab9a 100644
--- a/src/hashutil.h
+++ b/src/hashutil.h
@@ -24,7 +24,7 @@
struct file_hash
{
uint8_t hash[16];
- uint32_t size;
+ uint32_t hashed_content_size;
};
unsigned hash_from_string(void *str);
diff --git a/src/manifest.c b/src/manifest.c
index 5e0bcac6..83288512 100644
--- a/src/manifest.c
+++ b/src/manifest.c
@@ -24,22 +24,23 @@
// Sketchy specification of the manifest disk format:
//
-// <magic> magic number (4 bytes)
-// <version> file format version (1 byte unsigned int)
-// <hash_size> size of the hash fields (in bytes) (1 byte unsigned int)
-// <reserved> reserved for future use (2 bytes)
+// <magic> magic number (4 bytes)
+// <version> file format version (1 byte unsigned int)
+// <hash_size> size of the hash fields (in bytes) (1 byte unsigned int)
+// <reserved> reserved for future use (2 bytes)
// ----------------------------------------------------------------------------
-// <n> number of include file paths (4 bytes unsigned int)
-// <path_0> path to include file (NUL-terminated string,
+// <n> number of include file paths (4 bytes unsigned int)
+// <path_0> path to include file (NUL-terminated string,
// ... at most 1024 bytes)
// <path_n-1>
// ----------------------------------------------------------------------------
-// <n> number of include file hash entries (4 bytes unsigned int)
-// <index[0]> index of include file path (4 bytes unsigned int)
-// <hash[0]> hash of include file (<hash_size> bytes)
-// <size[0]> size of include file (4 bytes unsigned int)
-// <mtime[0]> mtime of include file (8 bytes signed int)
-// <ctime[0]> ctime of include file (8 bytes signed int)
+// <n> number of include file hash entries (4 bytes unsigned int)
+// <index[0]> index of include file path (4 bytes unsigned int)
+// <hash[0]> hash of include file (<hash_size> bytes)
+// <size[0]> size of include file (8 bytes unsigned int)
+// <hashed_content_size[0]> bytes passed through hash function (4 bytes unsigned int)
+// <mtime[0]> mtime of include file (8 bytes signed int)
+// <ctime[0]> ctime of include file (8 bytes signed int)
// ...
// <index[n-1]>
// <hash[n-1]>
@@ -47,16 +48,16 @@
// <mtime[n-1]>
// <ctime[n-1]>
// ----------------------------------------------------------------------------
-// <n> number of object name entries (4 bytes unsigned int)
-// <m[0]> number of include file hash indexes (4 bytes unsigned int)
-// <index[0][0]> include file hash index (4 bytes unsigned int)
+// <n> number of object name entries (4 bytes unsigned int)
+// <m[0]> number of include file hash indexes (4 bytes unsigned int)
+// <index[0][0]> include file hash index (4 bytes unsigned int)
// ...
// <index[0][m[0]-1]>
-// <hash[0]> hash part of object name (<hash_size> bytes)
-// <size[0]> size part of object name (4 bytes unsigned int)
+// <hash[0]> hash part of object name (<hash_size> bytes)
+// <size[0]> size part of object name (4 bytes unsigned int)
// ...
-// <m[n-1]> number of include file hash indexes
-// <index[n-1][0]> include file hash index
+// <m[n-1]> number of include file hash indexes
+// <index[n-1][0]> include file hash index
// ...
// <index[n-1][m[n-1]]>
// <hash[n-1]>
@@ -74,8 +75,10 @@ struct file_info {
uint32_t index;
// Hash of referenced file.
uint8_t hash[16];
+ // Hashed content byte count
+ uint32_t hashed_content_size;
// Size of referenced file.
- uint32_t size;
+ uint64_t size;
// mtime of referenced file.
int64_t mtime;
// ctime of referenced file.
@@ -115,7 +118,7 @@ struct manifest {
};
struct file_stats {
- uint32_t size;
+ uint64_t size;
int64_t mtime;
int64_t ctime;
};
@@ -123,7 +126,7 @@ struct file_stats {
static unsigned int
hash_from_file_info(void *key)
{
- ccache_static_assert(sizeof(struct file_info) == 40); // No padding.
+ ccache_static_assert(sizeof(struct file_info) == 48); // No padding.
return murmurhashneutral2(key, sizeof(struct file_info), 0);
}
@@ -135,6 +138,7 @@ file_infos_equal(void *key1, void *key2)
return fi1->index == fi2->index
&& memcmp(fi1->hash, fi2->hash, 16) == 0
&& fi1->size == fi2->size
+ && fi1->hashed_content_size == fi2->hashed_content_size
&& fi1->mtime == fi2->mtime
&& fi1->ctime == fi2->ctime;
}
@@ -261,7 +265,8 @@ read_manifest(gzFile f)
for (uint32_t i = 0; i < mf->n_file_infos; i++) {
READ_INT(4, mf->file_infos[i].index);
READ_BYTES(mf->hash_size, mf->file_infos[i].hash);
- READ_INT(4, mf->file_infos[i].size);
+ READ_INT(8, mf->file_infos[i].size);
+ READ_INT(4, mf->file_infos[i].hashed_content_size);
READ_INT(8, mf->file_infos[i].mtime);
READ_INT(8, mf->file_infos[i].ctime);
}
@@ -277,7 +282,7 @@ read_manifest(gzFile f)
READ_INT(4, mf->objects[i].file_info_indexes[j]);
}
READ_BYTES(mf->hash_size, mf->objects[i].hash.hash);
- READ_INT(4, mf->objects[i].hash.size);
+ READ_INT(4, mf->objects[i].hash.hashed_content_size);
}
return mf;
@@ -335,7 +340,8 @@ write_manifest(gzFile f, const struct manifest *mf)
for (uint32_t i = 0; i < mf->n_file_infos; i++) {
WRITE_INT(4, mf->file_infos[i].index);
WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash);
- WRITE_INT(4, mf->file_infos[i].size);
+ WRITE_INT(8, mf->file_infos[i].size);
+ WRITE_INT(4, mf->file_infos[i].hashed_content_size);
WRITE_INT(8, mf->file_infos[i].mtime);
WRITE_INT(8, mf->file_infos[i].ctime);
}
@@ -347,7 +353,7 @@ write_manifest(gzFile f, const struct manifest *mf)
WRITE_INT(4, mf->objects[i].file_info_indexes[j]);
}
WRITE_BYTES(mf->hash_size, mf->objects[i].hash.hash);
- WRITE_INT(4, mf->objects[i].hash.size);
+ WRITE_INT(4, mf->objects[i].hash.hashed_content_size);
}
return 1;
@@ -377,6 +383,7 @@ verify_object(struct conf *conf, struct manifest *mf, struct object *obj,
hashtable_insert(stated_files, x_strdup(path), st);
}
+ // Compare actual file sizes
if (fi->size != st->size) {
return 0;
}
@@ -424,12 +431,12 @@ verify_object(struct conf *conf, struct manifest *mf, struct object *obj,
}
actual = x_malloc(sizeof(*actual));
hash_result_as_bytes(hash, actual->hash);
- actual->size = hash_input_size(hash);
+ actual->hashed_content_size = hash_input_size(hash);
hashtable_insert(hashed_files, x_strdup(path), actual);
hash_free(hash);
}
if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0
- || fi->size != actual->size) {
+ || fi->hashed_content_size != actual->hashed_content_size) {
return 0;
}
}
@@ -491,7 +498,7 @@ get_file_hash_index(struct manifest *mf,
struct file_info fi;
fi.index = get_include_file_index(mf, path, mf_files);
memcpy(fi.hash, file_hash->hash, sizeof(fi.hash));
- fi.size = file_hash->size;
+ fi.hashed_content_size = file_hash->hashed_content_size;
// file_stat.st_{m,c}time has a resolution of 1 second, so we can cache the
// file's mtime and ctime only if they're at least one second older than
@@ -501,13 +508,20 @@ get_file_hash_index(struct manifest *mf,
// MAX(mtime, ctime).
struct stat file_stat;
- if (stat(path, &file_stat) != -1
- && time_of_compilation > MAX(file_stat.st_mtime, file_stat.st_ctime)) {
- fi.mtime = file_stat.st_mtime;
- fi.ctime = file_stat.st_ctime;
+ if (stat(path, &file_stat) != -1) {
+ if (time_of_compilation > MAX(file_stat.st_mtime, file_stat.st_ctime)) {
+ fi.mtime = file_stat.st_mtime;
+ fi.ctime = file_stat.st_ctime;
+ }
+ else {
+ fi.mtime = -1;
+ fi.ctime = -1;
+ }
+ fi.size = file_stat.st_size;
} else {
fi.mtime = -1;
fi.ctime = -1;
+ fi.size = file_hash->hashed_content_size;
}
uint32_t *fi_index = hashtable_search(mf_file_infos, &fi);
@@ -566,7 +580,7 @@ add_object_entry(struct manifest *mf,
obj->file_info_indexes = x_malloc(n_fii * sizeof(*obj->file_info_indexes));
add_file_info_indexes(obj->file_info_indexes, n_fii, mf, included_files);
memcpy(obj->hash.hash, object_hash->hash, mf->hash_size);
- obj->hash.size = object_hash->size;
+ obj->hash.hashed_content_size = object_hash->hashed_content_size;
}
// Try to get the object hash from a manifest file. Caller frees. Returns NULL
@@ -767,7 +781,8 @@ manifest_dump(const char *manifest_path, FILE *stream)
hash = format_hash_as_string(mf->file_infos[i].hash, -1);
fprintf(stream, " Hash: %s\n", hash);
free(hash);
- fprintf(stream, " Size: %u\n", mf->file_infos[i].size);
+ fprintf(stream, " File size: %"PRIu64"\n", mf->file_infos[i].size);
+ fprintf(stream, " Hashed bytes: %u\n", mf->file_infos[i].hashed_content_size);
fprintf(stream, " Mtime: %lld\n", (long long)mf->file_infos[i].mtime);
fprintf(stream, " Ctime: %lld\n", (long long)mf->file_infos[i].ctime);
}
@@ -783,7 +798,7 @@ manifest_dump(const char *manifest_path, FILE *stream)
hash = format_hash_as_string(mf->objects[i].hash.hash, -1);
fprintf(stream, " Hash: %s\n", hash);
free(hash);
- fprintf(stream, " Size: %u\n", (unsigned)mf->objects[i].hash.size);
+ fprintf(stream, " Size: %u\n", (unsigned)mf->objects[i].hash.hashed_content_size);
}
ret = true;
diff --git a/src/manifest.h b/src/manifest.h
index e116c349..98022d1a 100644
--- a/src/manifest.h
+++ b/src/manifest.h
@@ -5,7 +5,7 @@
#include "hashutil.h"
#include "hashtable.h"
-#define MANIFEST_VERSION 1
+#define MANIFEST_VERSION 2
struct file_hash *manifest_get(struct conf *conf, const char *manifest_path);
bool manifest_put(const char *manifest_path, struct file_hash *object_hash,
diff --git a/test/suites/depend.bash b/test/suites/depend.bash
index a13f8426..97b7858e 100644
--- a/test/suites/depend.bash
+++ b/test/suites/depend.bash
@@ -294,6 +294,5 @@ EOF
expect_stat 'files in cache' 9
# -------------------------------------------------------------------------
-
# TODO: Add more test cases (see direct.bash for inspiration)
}
diff --git a/test/suites/direct.bash b/test/suites/direct.bash
index 13730443..c73212fb 100644
--- a/test/suites/direct.bash
+++ b/test/suites/direct.bash
@@ -759,6 +759,30 @@ EOF
expect_stat 'cache miss' 1
# -------------------------------------------------------------------------
+ TEST "__DATE__ in header file results in direct cache hit as the date remains the same"
+
+ cat <<EOF >test_date2.c
+// test_date2.c
+#include "test_date2.h"
+char date_str[] = MACRO_STRING;
+EOF
+ cat <<EOF >test_date2.h
+#define MACRO_STRING __DATE__
+EOF
+
+ backdate test_date2.c test_date2.h
+
+ $CCACHE_COMPILE -MP -MMD -MF test_date2.d -c test_date2.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ $CCACHE_COMPILE -MP -MMD -MF test_date2.d -c test_date2.c
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ # -------------------------------------------------------------------------
TEST "New include file ignored if sloppy"
cat <<EOF >new.c