From eae97078ab2b0c7c8bcf1f38ebe8fe64600d05ad Mon Sep 17 00:00:00 2001 From: Pavol Sakac <26042917+pavsa@users.noreply.github.com> Date: Sun, 5 May 2019 21:04:30 +0200 Subject: Fix object size verification + bump to 64 bit file sizes in manifest (#407) Changed manifest format to save the actual file size along with hashed content size. File size field in manifest updated to 64bits. Manifest version set to 2. Fixes #382. --- src/ccache.c | 10 +++--- src/hashutil.c | 2 +- src/hashutil.h | 2 +- src/manifest.c | 87 +++++++++++++++++++++++++++++-------------------- src/manifest.h | 2 +- test/suites/depend.bash | 1 - test/suites/direct.bash | 24 ++++++++++++++ 7 files changed, 83 insertions(+), 45 deletions(-) diff --git a/src/ccache.c b/src/ccache.c index 7fa94146..590cf866 100644 --- a/src/ccache.c +++ b/src/ccache.c @@ -742,13 +742,13 @@ remember_include_file(char *path, struct hash *cpp_hash, bool system, struct file_hash *h = x_malloc(sizeof(*h)); hash_result_as_bytes(fhash, h->hash); - h->size = hash_input_size(fhash); + h->hashed_content_size = hash_input_size(fhash); hashtable_insert(included_files, path, h); path = NULL; // Ownership transferred to included_files. if (depend_mode_hash) { hash_delimiter(depend_mode_hash, "include"); - char *result = format_hash_as_string(h->hash, h->size); + char *result = format_hash_as_string(h->hash, h->hashed_content_size); hash_string(depend_mode_hash, result); free(result); } @@ -1169,7 +1169,7 @@ object_hash_from_depfile(const char *depfile, struct hash *hash) struct file_hash *result = x_malloc(sizeof(*result)); hash_result_as_bytes(hash, result->hash); - result->size = hash_input_size(hash); + result->hashed_content_size = hash_input_size(hash); return result; } @@ -1366,7 +1366,7 @@ update_manifest_file(void) static void update_cached_result_globals(struct file_hash *hash) { - char *object_name = format_hash_as_string(hash->hash, hash->size); + char *object_name = format_hash_as_string(hash->hash, hash->hashed_content_size); cached_obj_hash = hash; cached_obj = get_path_in_cache(object_name, ".o"); cached_stderr = get_path_in_cache(object_name, ".stderr"); @@ -1728,7 +1728,7 @@ get_object_name_from_cpp(struct args *args, struct hash *hash) struct file_hash *result = x_malloc(sizeof(*result)); hash_result_as_bytes(hash, result->hash); - result->size = hash_input_size(hash); + result->hashed_content_size = hash_input_size(hash); return result; } diff --git a/src/hashutil.c b/src/hashutil.c index edf446c4..5f2ae791 100644 --- a/src/hashutil.c +++ b/src/hashutil.c @@ -41,7 +41,7 @@ int file_hashes_equal(struct file_hash *fh1, struct file_hash *fh2) { return memcmp(fh1->hash, fh2->hash, 16) == 0 - && fh1->size == fh2->size; + && fh1->hashed_content_size == fh2->hashed_content_size; } // Search for the strings "__DATE__" and "__TIME__" in str. diff --git a/src/hashutil.h b/src/hashutil.h index b6b916ef..3dc7ab9a 100644 --- a/src/hashutil.h +++ b/src/hashutil.h @@ -24,7 +24,7 @@ struct file_hash { uint8_t hash[16]; - uint32_t size; + uint32_t hashed_content_size; }; unsigned hash_from_string(void *str); diff --git a/src/manifest.c b/src/manifest.c index 5e0bcac6..83288512 100644 --- a/src/manifest.c +++ b/src/manifest.c @@ -24,22 +24,23 @@ // Sketchy specification of the manifest disk format: // -// magic number (4 bytes) -// file format version (1 byte unsigned int) -// size of the hash fields (in bytes) (1 byte unsigned int) -// reserved for future use (2 bytes) +// magic number (4 bytes) +// file format version (1 byte unsigned int) +// size of the hash fields (in bytes) (1 byte unsigned int) +// reserved for future use (2 bytes) // ---------------------------------------------------------------------------- -// number of include file paths (4 bytes unsigned int) -// path to include file (NUL-terminated string, +// number of include file paths (4 bytes unsigned int) +// path to include file (NUL-terminated string, // ... at most 1024 bytes) // // ---------------------------------------------------------------------------- -// number of include file hash entries (4 bytes unsigned int) -// index of include file path (4 bytes unsigned int) -// hash of include file ( bytes) -// size of include file (4 bytes unsigned int) -// mtime of include file (8 bytes signed int) -// ctime of include file (8 bytes signed int) +// number of include file hash entries (4 bytes unsigned int) +// index of include file path (4 bytes unsigned int) +// hash of include file ( bytes) +// size of include file (8 bytes unsigned int) +// bytes passed through hash function (4 bytes unsigned int) +// mtime of include file (8 bytes signed int) +// ctime of include file (8 bytes signed int) // ... // // @@ -47,16 +48,16 @@ // // // ---------------------------------------------------------------------------- -// number of object name entries (4 bytes unsigned int) -// number of include file hash indexes (4 bytes unsigned int) -// include file hash index (4 bytes unsigned int) +// number of object name entries (4 bytes unsigned int) +// number of include file hash indexes (4 bytes unsigned int) +// include file hash index (4 bytes unsigned int) // ... // -// hash part of object name ( bytes) -// size part of object name (4 bytes unsigned int) +// hash part of object name ( bytes) +// size part of object name (4 bytes unsigned int) // ... -// number of include file hash indexes -// include file hash index +// number of include file hash indexes +// include file hash index // ... // // @@ -74,8 +75,10 @@ struct file_info { uint32_t index; // Hash of referenced file. uint8_t hash[16]; + // Hashed content byte count + uint32_t hashed_content_size; // Size of referenced file. - uint32_t size; + uint64_t size; // mtime of referenced file. int64_t mtime; // ctime of referenced file. @@ -115,7 +118,7 @@ struct manifest { }; struct file_stats { - uint32_t size; + uint64_t size; int64_t mtime; int64_t ctime; }; @@ -123,7 +126,7 @@ struct file_stats { static unsigned int hash_from_file_info(void *key) { - ccache_static_assert(sizeof(struct file_info) == 40); // No padding. + ccache_static_assert(sizeof(struct file_info) == 48); // No padding. return murmurhashneutral2(key, sizeof(struct file_info), 0); } @@ -135,6 +138,7 @@ file_infos_equal(void *key1, void *key2) return fi1->index == fi2->index && memcmp(fi1->hash, fi2->hash, 16) == 0 && fi1->size == fi2->size + && fi1->hashed_content_size == fi2->hashed_content_size && fi1->mtime == fi2->mtime && fi1->ctime == fi2->ctime; } @@ -261,7 +265,8 @@ read_manifest(gzFile f) for (uint32_t i = 0; i < mf->n_file_infos; i++) { READ_INT(4, mf->file_infos[i].index); READ_BYTES(mf->hash_size, mf->file_infos[i].hash); - READ_INT(4, mf->file_infos[i].size); + READ_INT(8, mf->file_infos[i].size); + READ_INT(4, mf->file_infos[i].hashed_content_size); READ_INT(8, mf->file_infos[i].mtime); READ_INT(8, mf->file_infos[i].ctime); } @@ -277,7 +282,7 @@ read_manifest(gzFile f) READ_INT(4, mf->objects[i].file_info_indexes[j]); } READ_BYTES(mf->hash_size, mf->objects[i].hash.hash); - READ_INT(4, mf->objects[i].hash.size); + READ_INT(4, mf->objects[i].hash.hashed_content_size); } return mf; @@ -335,7 +340,8 @@ write_manifest(gzFile f, const struct manifest *mf) for (uint32_t i = 0; i < mf->n_file_infos; i++) { WRITE_INT(4, mf->file_infos[i].index); WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash); - WRITE_INT(4, mf->file_infos[i].size); + WRITE_INT(8, mf->file_infos[i].size); + WRITE_INT(4, mf->file_infos[i].hashed_content_size); WRITE_INT(8, mf->file_infos[i].mtime); WRITE_INT(8, mf->file_infos[i].ctime); } @@ -347,7 +353,7 @@ write_manifest(gzFile f, const struct manifest *mf) WRITE_INT(4, mf->objects[i].file_info_indexes[j]); } WRITE_BYTES(mf->hash_size, mf->objects[i].hash.hash); - WRITE_INT(4, mf->objects[i].hash.size); + WRITE_INT(4, mf->objects[i].hash.hashed_content_size); } return 1; @@ -377,6 +383,7 @@ verify_object(struct conf *conf, struct manifest *mf, struct object *obj, hashtable_insert(stated_files, x_strdup(path), st); } + // Compare actual file sizes if (fi->size != st->size) { return 0; } @@ -424,12 +431,12 @@ verify_object(struct conf *conf, struct manifest *mf, struct object *obj, } actual = x_malloc(sizeof(*actual)); hash_result_as_bytes(hash, actual->hash); - actual->size = hash_input_size(hash); + actual->hashed_content_size = hash_input_size(hash); hashtable_insert(hashed_files, x_strdup(path), actual); hash_free(hash); } if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0 - || fi->size != actual->size) { + || fi->hashed_content_size != actual->hashed_content_size) { return 0; } } @@ -491,7 +498,7 @@ get_file_hash_index(struct manifest *mf, struct file_info fi; fi.index = get_include_file_index(mf, path, mf_files); memcpy(fi.hash, file_hash->hash, sizeof(fi.hash)); - fi.size = file_hash->size; + fi.hashed_content_size = file_hash->hashed_content_size; // file_stat.st_{m,c}time has a resolution of 1 second, so we can cache the // file's mtime and ctime only if they're at least one second older than @@ -501,13 +508,20 @@ get_file_hash_index(struct manifest *mf, // MAX(mtime, ctime). struct stat file_stat; - if (stat(path, &file_stat) != -1 - && time_of_compilation > MAX(file_stat.st_mtime, file_stat.st_ctime)) { - fi.mtime = file_stat.st_mtime; - fi.ctime = file_stat.st_ctime; + if (stat(path, &file_stat) != -1) { + if (time_of_compilation > MAX(file_stat.st_mtime, file_stat.st_ctime)) { + fi.mtime = file_stat.st_mtime; + fi.ctime = file_stat.st_ctime; + } + else { + fi.mtime = -1; + fi.ctime = -1; + } + fi.size = file_stat.st_size; } else { fi.mtime = -1; fi.ctime = -1; + fi.size = file_hash->hashed_content_size; } uint32_t *fi_index = hashtable_search(mf_file_infos, &fi); @@ -566,7 +580,7 @@ add_object_entry(struct manifest *mf, obj->file_info_indexes = x_malloc(n_fii * sizeof(*obj->file_info_indexes)); add_file_info_indexes(obj->file_info_indexes, n_fii, mf, included_files); memcpy(obj->hash.hash, object_hash->hash, mf->hash_size); - obj->hash.size = object_hash->size; + obj->hash.hashed_content_size = object_hash->hashed_content_size; } // Try to get the object hash from a manifest file. Caller frees. Returns NULL @@ -767,7 +781,8 @@ manifest_dump(const char *manifest_path, FILE *stream) hash = format_hash_as_string(mf->file_infos[i].hash, -1); fprintf(stream, " Hash: %s\n", hash); free(hash); - fprintf(stream, " Size: %u\n", mf->file_infos[i].size); + fprintf(stream, " File size: %"PRIu64"\n", mf->file_infos[i].size); + fprintf(stream, " Hashed bytes: %u\n", mf->file_infos[i].hashed_content_size); fprintf(stream, " Mtime: %lld\n", (long long)mf->file_infos[i].mtime); fprintf(stream, " Ctime: %lld\n", (long long)mf->file_infos[i].ctime); } @@ -783,7 +798,7 @@ manifest_dump(const char *manifest_path, FILE *stream) hash = format_hash_as_string(mf->objects[i].hash.hash, -1); fprintf(stream, " Hash: %s\n", hash); free(hash); - fprintf(stream, " Size: %u\n", (unsigned)mf->objects[i].hash.size); + fprintf(stream, " Size: %u\n", (unsigned)mf->objects[i].hash.hashed_content_size); } ret = true; diff --git a/src/manifest.h b/src/manifest.h index e116c349..98022d1a 100644 --- a/src/manifest.h +++ b/src/manifest.h @@ -5,7 +5,7 @@ #include "hashutil.h" #include "hashtable.h" -#define MANIFEST_VERSION 1 +#define MANIFEST_VERSION 2 struct file_hash *manifest_get(struct conf *conf, const char *manifest_path); bool manifest_put(const char *manifest_path, struct file_hash *object_hash, diff --git a/test/suites/depend.bash b/test/suites/depend.bash index a13f8426..97b7858e 100644 --- a/test/suites/depend.bash +++ b/test/suites/depend.bash @@ -294,6 +294,5 @@ EOF expect_stat 'files in cache' 9 # ------------------------------------------------------------------------- - # TODO: Add more test cases (see direct.bash for inspiration) } diff --git a/test/suites/direct.bash b/test/suites/direct.bash index 13730443..c73212fb 100644 --- a/test/suites/direct.bash +++ b/test/suites/direct.bash @@ -758,6 +758,30 @@ EOF expect_stat 'cache hit (preprocessed)' 1 expect_stat 'cache miss' 1 + # ------------------------------------------------------------------------- + TEST "__DATE__ in header file results in direct cache hit as the date remains the same" + + cat <test_date2.c +// test_date2.c +#include "test_date2.h" +char date_str[] = MACRO_STRING; +EOF + cat <test_date2.h +#define MACRO_STRING __DATE__ +EOF + + backdate test_date2.c test_date2.h + + $CCACHE_COMPILE -MP -MMD -MF test_date2.d -c test_date2.c + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + + $CCACHE_COMPILE -MP -MMD -MF test_date2.d -c test_date2.c + expect_stat 'cache hit (direct)' 1 + expect_stat 'cache hit (preprocessed)' 0 + expect_stat 'cache miss' 1 + # ------------------------------------------------------------------------- TEST "New include file ignored if sloppy" -- cgit v1.2.1