From 4c1a92a0d8be2d86a5a9e0c4dbe5a84718d57b3a Mon Sep 17 00:00:00 2001 From: Joel Rosdahl Date: Sun, 7 Oct 2018 20:40:55 +0200 Subject: Refactor debug mode code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Knowledge of the debug mode snuck into the mdfour struct and code, which did not feel good. * Introduced a new “struct hash”, which is used by the hash_* functions instead of “struct mdfour”. “struct hash” contains the mdfour state and also debug file handles that can be set via a new hash_enable_debug function. * Removed “identifier” from the mdfour struct and code. * Introduced hash.h and all documented hash_* functions. * Introduced unify.h. * Removed some global state from util.c. --- dev.mk.in | 2 + doc/MANUAL.adoc | 4 +- src/ccache.c | 163 +++++++++++++++++++++++------------------ src/ccache.h | 41 +++++------ src/hash.c | 186 +++++++++++++++++++++-------------------------- src/hash.h | 100 +++++++++++++++++++++++++ src/hashutil.c | 10 +-- src/hashutil.h | 26 +++++-- src/manifest.c | 12 +-- src/mdfour.c | 7 +- src/mdfour.h | 2 - src/unify.c | 9 ++- src/unify.h | 24 ++++++ unittest/test_hash.c | 57 +++++++++------ unittest/test_hashutil.c | 109 ++++++++++++++++----------- 15 files changed, 460 insertions(+), 292 deletions(-) create mode 100644 src/hash.h create mode 100644 src/unify.h diff --git a/dev.mk.in b/dev.mk.in index 9e260cd2..4ca34a46 100644 --- a/dev.mk.in +++ b/dev.mk.in @@ -40,6 +40,7 @@ headers = \ src/conf.h \ src/counters.h \ src/getopt_long.h \ + src/hash.h \ src/hashtable.h \ src/hashtable_itr.h \ src/hashtable_private.h \ @@ -50,6 +51,7 @@ headers = \ src/mdfour.h \ src/murmurhashneutral2.h \ src/system.h \ + src/unify.h \ unittest/framework.h \ unittest/util.h generated_headers = \ diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc index c360e57a..9ea08054 100644 --- a/doc/MANUAL.adoc +++ b/doc/MANUAL.adoc @@ -871,8 +871,8 @@ debug mode via the configuration setting *debug* or by setting *CCACHE_DEBUG* in the environment. This can be useful if you are investigating why you don't get cache hits. Note that performance will be reduced slightly. -When the debug mode is enabled, ccache will create five additional files next -to the object file: +When the debug mode is enabled, ccache will create up to five additional files +next to the object file: [options="header",cols="30%,70%"] |============================================================================== diff --git a/src/ccache.c b/src/ccache.c index cd74f116..e7790040 100644 --- a/src/ccache.c +++ b/src/ccache.c @@ -24,11 +24,13 @@ #else #include "getopt_long.h" #endif +#include "hash.h" #include "hashtable.h" #include "hashtable_itr.h" #include "hashutil.h" #include "language.h" #include "manifest.h" +#include "unify.h" #define STRINGIFY(x) #x #define TO_STRING(x) STRINGIFY(x) @@ -488,24 +490,38 @@ clean_up_internal_tempdir(void) } static void -debug_start(const char *path) +fclose_exitfn(void *context) { - char *hash_bin = format("%s%s", path, ".ccache-input-X"); - char *hash_txt = format("%s%s", path, ".ccache-input-text"); - hash_debug_init(hash_bin, hash_txt); - free(hash_bin); - free(hash_txt); + fclose((FILE *)context); } static void -debug_end() +dump_log_buffer_exitfn(void *context) { - hash_debug_end(); - char *path = format("%s%s", output_obj, ".ccache-log"); + if (!conf->debug) { + return; + } + + char *path = format("%s.ccache-log", (const char *)context); cc_dump_log_buffer(path); free(path); } +static void +init_hash_debug(struct hash *hash, const char *obj_path, char type, + const char *section_name, FILE *debug_text_file) +{ + if (!conf->debug) { + return; + } + + char *path = format("%s.ccache-input-%c", obj_path, type); + FILE *debug_binary_file = fopen(path, "wb"); + hash_enable_debug(hash, section_name, debug_binary_file, debug_text_file); + free(path); + exitfn_add(fclose_exitfn, debug_binary_file); +} + static enum guessed_compiler guess_compiler(const char *path) { @@ -564,27 +580,29 @@ get_path_in_cache(const char *name, const char *suffix) // global included_files variable. If the include file is a PCH, cpp_hash is // also updated. Takes over ownership of path. static void -remember_include_file(char *path, struct mdfour *cpp_hash, bool system) +remember_include_file(char *path, struct hash *cpp_hash, bool system) { + struct hash *fhash = NULL; + size_t path_len = strlen(path); if (path_len >= 2 && (path[0] == '<' && path[path_len - 1] == '>')) { // Typically or . - goto ignore; + goto out; } if (str_eq(path, input_file)) { // Don't remember the input file. - goto ignore; + goto out; } if (system && (conf->sloppiness & SLOPPY_NO_SYSTEM_HEADERS)) { // Don't remember this system header. - goto ignore; + goto out; } if (hashtable_search(included_files, path)) { // Already known include file. - goto ignore; + goto out; } #ifdef _WIN32 @@ -592,7 +610,7 @@ remember_include_file(char *path, struct mdfour *cpp_hash, bool system) DWORD attributes = GetFileAttributes(path); if (attributes != INVALID_FILE_ATTRIBUTES && attributes & FILE_ATTRIBUTE_DIRECTORY) { - goto ignore; + goto out; } #endif @@ -602,7 +620,7 @@ remember_include_file(char *path, struct mdfour *cpp_hash, bool system) } if (S_ISDIR(st.st_mode)) { // Ignore directory, typically $PWD. - goto ignore; + goto out; } if (!S_ISREG(st.st_mode)) { // Device, pipe, socket or other strange creature. @@ -628,7 +646,7 @@ remember_include_file(char *path, struct mdfour *cpp_hash, bool system) && (ignore[ignore_len-1] == DIR_DELIM_CH || canonical[ignore_len] == DIR_DELIM_CH || canonical[ignore_len] == '\0')) { - goto ignore; + goto out; } } @@ -649,8 +667,7 @@ remember_include_file(char *path, struct mdfour *cpp_hash, bool system) } // Let's hash the include file content. - struct mdfour fhash; - hash_start(&fhash); + fhash = hash_init(); bool is_pch = is_precompiled_header(path); if (is_pch) { @@ -669,14 +686,13 @@ remember_include_file(char *path, struct mdfour *cpp_hash, bool system) free(pch_sum_path); } - if (!hash_file(&fhash, path)) { + if (!hash_file(fhash, path)) { goto failure; } - struct file_hash pch_hash; - hash_result_as_bytes(&fhash, pch_hash.hash); - pch_hash.size = fhash.totalN; hash_delimiter(cpp_hash, using_pch_sum ? "pch_sum_hash" : "pch_hash"); - hash_buffer(cpp_hash, pch_hash.hash, sizeof(pch_hash.hash)); + char *pch_hash_result = hash_result(fhash); + hash_string(cpp_hash, pch_hash_result); + free(pch_hash_result); } if (conf->direct_mode) { @@ -692,7 +708,7 @@ remember_include_file(char *path, struct mdfour *cpp_hash, bool system) size = 0; } - int result = hash_source_code_string(conf, &fhash, source, size, path); + int result = hash_source_code_string(conf, fhash, source, size, path); free(source); if (result & HASH_SOURCE_CODE_ERROR || result & HASH_SOURCE_CODE_FOUND_TIME) { @@ -701,14 +717,13 @@ remember_include_file(char *path, struct mdfour *cpp_hash, bool system) } struct file_hash *h = x_malloc(sizeof(*h)); - hash_result_as_bytes(&fhash, h->hash); - h->size = fhash.totalN; + hash_result_as_bytes(fhash, h->hash); + h->size = hash_input_size(fhash); hashtable_insert(included_files, path, h); - } else { - free(path); + path = NULL; // Ownership transferred to included_files. } - return; + goto out; failure: if (conf->direct_mode) { @@ -716,7 +731,8 @@ failure: conf->direct_mode = false; } // Fall through. -ignore: +out: + hash_free(fhash); free(path); } @@ -784,7 +800,7 @@ make_relative_path(char *path) // - Stores the paths and hashes of included files in the global variable // included_files. static bool -process_preprocessed_file(struct mdfour *hash, const char *path, bool pump) +process_preprocessed_file(struct hash *hash, const char *path, bool pump) { char *data; size_t size; @@ -856,7 +872,7 @@ process_preprocessed_file(struct mdfour *hash, const char *path, bool pump) if (str_startswith(q, "# 31 \"\"\n")) { // Bogus extra line with #31, after the regular #1: Ignore the whole // line, and continue parsing. - hash_buffer(hash, p, q - p); + hash_string_buffer(hash, p, q - p); while (q < end && *q != '\n') { q++; } @@ -866,7 +882,7 @@ process_preprocessed_file(struct mdfour *hash, const char *path, bool pump) } else if (str_startswith(q, "# 32 \"\" 2\n")) { // Bogus wrong line with #32, instead of regular #1: Replace the line // number with the usual one. - hash_buffer(hash, p, q - p); + hash_string_buffer(hash, p, q - p); q += 1; q[0] = '#'; q[1] = ' '; @@ -890,7 +906,7 @@ process_preprocessed_file(struct mdfour *hash, const char *path, bool pump) return false; } // q points to the beginning of an include file path - hash_buffer(hash, p, q - p); + hash_string_buffer(hash, p, q - p); p = q; while (q < end && *q != '"') { q++; @@ -925,7 +941,7 @@ process_preprocessed_file(struct mdfour *hash, const char *path, bool pump) } } if (should_hash_inc_path) { - hash_buffer(hash, inc_path, strlen(inc_path)); + hash_string_buffer(hash, inc_path, strlen(inc_path)); } remember_include_file(inc_path, hash, system); @@ -957,7 +973,7 @@ process_preprocessed_file(struct mdfour *hash, const char *path, bool pump) } } - hash_buffer(hash, p, (end - p)); + hash_string_buffer(hash, p, (end - p)); free(data); free(cwd); @@ -1401,7 +1417,7 @@ to_cache(struct args *args) // Find the object file name by running the compiler in preprocessor mode. // Returns the hash as a heap-allocated hex string. static struct file_hash * -get_object_name_from_cpp(struct args *args, struct mdfour *hash) +get_object_name_from_cpp(struct args *args, struct hash *hash) { time_of_compilation = time(NULL); @@ -1504,7 +1520,7 @@ get_object_name_from_cpp(struct args *args, struct mdfour *hash) struct file_hash *result = x_malloc(sizeof(*result)); hash_result_as_bytes(hash, result->hash); - result->size = hash->totalN; + result->size = hash_input_size(hash); return result; } @@ -1528,7 +1544,7 @@ update_cached_result_globals(struct file_hash *hash) // Hash mtime or content of a file, or the output of a command, according to // the CCACHE_COMPILERCHECK setting. static void -hash_compiler(struct mdfour *hash, struct stat *st, const char *path, +hash_compiler(struct hash *hash, struct stat *st, const char *path, bool allow_command) { if (str_eq(conf->compiler_check, "none")) { @@ -1558,7 +1574,7 @@ hash_compiler(struct mdfour *hash, struct stat *st, const char *path, // with -ccbin/--compiler-bindir. If they are NULL, the compilers are looked up // in PATH instead. static void -hash_nvcc_host_compiler(struct mdfour *hash, struct stat *ccbin_st, +hash_nvcc_host_compiler(struct hash *hash, struct stat *ccbin_st, const char *ccbin) { // From : @@ -1607,7 +1623,7 @@ hash_nvcc_host_compiler(struct mdfour *hash, struct stat *ccbin_st, // Update a hash sum with information common for the direct and preprocessor // modes. static void -calculate_common_hash(struct args *args, struct mdfour *hash) +calculate_common_hash(struct args *args, struct hash *hash) { hash_string(hash, HASH_PREFIX); @@ -1733,7 +1749,7 @@ calculate_common_hash(struct args *args, struct mdfour *hash) // modes and calculate the object hash. Returns the object hash on success, // otherwise NULL. Caller frees. static struct file_hash * -calculate_object_hash(struct args *args, struct mdfour *hash, int direct_mode) +calculate_object_hash(struct args *args, struct hash *hash, int direct_mode) { bool found_ccbin = false; @@ -1793,17 +1809,17 @@ calculate_object_hash(struct args *args, struct mdfour *hash, int direct_mode) if (str_startswith(args->argv[i], "-Wp,")) { if (str_startswith(args->argv[i], "-Wp,-MD,") && !strchr(args->argv[i] + 8, ',')) { - hash_string_length(hash, args->argv[i], 8); + hash_string_buffer(hash, args->argv[i], 8); continue; } else if (str_startswith(args->argv[i], "-Wp,-MMD,") && !strchr(args->argv[i] + 9, ',')) { - hash_string_length(hash, args->argv[i], 9); + hash_string_buffer(hash, args->argv[i], 9); continue; } } else if (str_startswith(args->argv[i], "-MF")) { // In either case, hash the "-MF" part. hash_delimiter(hash, "arg"); - hash_string_length(hash, args->argv[i], 3); + hash_string_buffer(hash, args->argv[i], 3); bool separate_argument = (strlen(args->argv[i]) == 3); if (separate_argument) { @@ -3411,27 +3427,32 @@ ccache(int argc, char *argv[]) cc_log("Object file: %s", output_obj); + exitfn_add(dump_log_buffer_exitfn, output_obj); + + FILE *debug_text_file = NULL; if (conf->debug) { - debug_start(output_obj); - exitfn_add_nullary(debug_end); + char *path = format("%s.ccache-input-text", output_obj); + debug_text_file = fopen(path, "w"); + free(path); + exitfn_add(fclose_exitfn, debug_text_file); } - struct mdfour common_hash; - hash_start(&common_hash); - mdfour_identify(&common_hash, 'c'); - hash_section(&common_hash, "COMMON"); - calculate_common_hash(preprocessor_args, &common_hash); + struct hash *common_hash = hash_init(); + init_hash_debug(common_hash, output_obj, 'c', "COMMON", debug_text_file); + + calculate_common_hash(preprocessor_args, common_hash); // Try to find the hash using the manifest. - struct mdfour direct_hash = common_hash; - mdfour_identify(&direct_hash, 'd'); - hash_section(&direct_hash, "DIRECT MODE"); + struct hash *direct_hash = hash_copy(common_hash); + init_hash_debug( + direct_hash, output_obj, 'd', "DIRECT MODE", debug_text_file); + bool put_object_in_manifest = false; struct file_hash *object_hash = NULL; struct file_hash *object_hash_from_manifest = NULL; if (conf->direct_mode) { cc_log("Trying direct lookup"); - object_hash = calculate_object_hash(preprocessor_args, &direct_hash, 1); + object_hash = calculate_object_hash(preprocessor_args, direct_hash, 1); if (object_hash) { update_cached_result_globals(object_hash); @@ -3455,10 +3476,11 @@ ccache(int argc, char *argv[]) } // Find the hash using the preprocessed output. Also updates included_files. - struct mdfour cpp_hash = common_hash; - mdfour_identify(&cpp_hash, 'p'); - hash_section(&cpp_hash, "PREPROCESSOR MODE"); - object_hash = calculate_object_hash(preprocessor_args, &cpp_hash, 0); + struct hash *cpp_hash = hash_copy(common_hash); + init_hash_debug( + cpp_hash, output_obj, 'p', "PREPROCESSOR MODE", debug_text_file); + + object_hash = calculate_object_hash(preprocessor_args, cpp_hash, 0); if (!object_hash) { fatal("internal error: object hash from cpp returned NULL"); } @@ -3532,8 +3554,6 @@ ccache_main_options(int argc, char *argv[]) {"zero-stats", no_argument, 0, 'z'}, {0, 0, 0, 0} }; - struct mdfour md; - char *s; int c; while ((c = getopt_long(argc, argv, "cChF:M:o:psVz", options, NULL)) != -1) { @@ -3543,17 +3563,20 @@ ccache_main_options(int argc, char *argv[]) break; case HASH_FILE: + { initialize(); - hash_start(&md); + struct hash *hash = hash_init(); if (str_eq(optarg, "-")) { - hash_fd(&md, STDIN_FILENO); + hash_fd(hash, STDIN_FILENO); } else { - hash_file(&md, optarg); + hash_file(hash, optarg); } - s = hash_result(&md); - puts(s); - free(s); + char *result = hash_result(hash); + puts(result); + free(result); + hash_free(hash); break; + } case 'c': // --cleanup initialize(); @@ -3602,7 +3625,7 @@ ccache_main_options(int argc, char *argv[]) if (size == 0) { printf("Unset cache size limit\n"); } else { - s = format_human_readable_size(size); + char *s = format_human_readable_size(size); printf("Set cache size limit to %s\n", s); free(s); } diff --git a/src/ccache.h b/src/ccache.h index b52f1b78..bb0a4292 100644 --- a/src/ccache.h +++ b/src/ccache.h @@ -1,8 +1,24 @@ +// Copyright (C) 2002-2007 Andrew Tridgell +// Copyright (C) 2009-2018 Joel Rosdahl +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + #ifndef CCACHE_H #define CCACHE_H #include "system.h" -#include "mdfour.h" #include "conf.h" #include "counters.h" @@ -118,24 +134,6 @@ void args_remove_first(struct args *args); char *args_to_string(struct args *args); bool args_equal(struct args *args1, struct args *args2); -// ---------------------------------------------------------------------------- -// hash.c - -void hash_debug_init(const char *bin, const char *txt); -void hash_debug_end(void); -void hash_start(struct mdfour *md); -void hash_section(struct mdfour *md, const char *name); -void hash_buffer(struct mdfour *md, const void *s, size_t len); -char *hash_result(struct mdfour *md); -void hash_result_as_bytes(struct mdfour *md, unsigned char *out); -bool hash_equal(struct mdfour *md1, struct mdfour *md2); -void hash_delimiter(struct mdfour *md, const char *type); -void hash_string(struct mdfour *md, const char *s); -void hash_string_length(struct mdfour *md, const char *s, int length); -void hash_int(struct mdfour *md, int x); -bool hash_fd(struct mdfour *md, int fd); -bool hash_file(struct mdfour *md, const char *fname); - // ---------------------------------------------------------------------------- // util.c @@ -224,11 +222,6 @@ void stats_timestamp(time_t time, struct counters *counters); void stats_read(const char *path, struct counters *counters); void stats_write(const char *path, struct counters *counters); -// ---------------------------------------------------------------------------- -// unify.c - -int unify_hash(struct mdfour *hash, const char *fname, bool print); - // ---------------------------------------------------------------------------- // exitfn.c diff --git a/src/hash.c b/src/hash.c index 0205143f..3e554cd5 100644 --- a/src/hash.c +++ b/src/hash.c @@ -15,168 +15,150 @@ // this program; if not, write to the Free Software Foundation, Inc., 51 // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +#include + #include "ccache.h" +#include "hash.h" +#include "mdfour.h" +#include "system.h" #define HASH_DELIMITER "\000cCaChE" -#define HASH_DEBUG_DELIMITER "### " - -// binary input, for hashing -static char *debug_hash_bin; - -// text input, for debugging -static char *debug_hash_txt; -// char mapping to open files -static FILE **debug_hash_file; +struct hash { + struct mdfour md; + FILE *debug_binary; + FILE *debug_text; +}; -void hash_debug_init(const char *bin, const char *txt) +static void +do_hash_buffer(struct hash *hash, const void *s, size_t len) { - debug_hash_file = x_calloc(256, sizeof(FILE *)); - static char *hash_types = "cdp"; // common, direct, cpp - if (bin) { - debug_hash_bin = x_strdup(bin); - assert(debug_hash_bin[strlen(debug_hash_bin)-1] == 'X'); - for (char *p = hash_types; *p != '\0'; p++) { - debug_hash_bin[strlen(debug_hash_bin)-1] = *p; - debug_hash_file[(int) *p] = fopen(debug_hash_bin, "wb"); - } - debug_hash_bin[strlen(debug_hash_bin)-1] = 'X'; - } - if (txt) { - debug_hash_txt = x_strdup(txt); - debug_hash_file[(int) 't'] = fopen(debug_hash_txt, "w"); + mdfour_update(&hash->md, (const unsigned char *)s, len); + if (len > 0 && hash->debug_binary) { + fwrite(s, 1, len, hash->debug_binary); } } -void hash_debug_end() +static void +do_debug_text(struct hash *hash, const void *s, size_t len) { - for (int i = 0; i < 256; i++) { - if (debug_hash_file[i] != NULL) { - fclose(debug_hash_file[i]); - } + if (len > 0 && hash->debug_text) { + fwrite(s, 1, len, hash->debug_text); } } -static void -hash_binary_buffer(struct mdfour *md, const void *s, size_t len) +struct hash * +hash_init(void) { - mdfour_update(md, (const unsigned char *)s, len); - if (!md->identifier || len == 0) { - return; - } - if (debug_hash_bin) { - // log to different files, for the different hash types - fwrite(s, 1, len, debug_hash_file[md->identifier]); - } + struct hash *hash = malloc(sizeof(struct hash)); + mdfour_begin(&hash->md); + hash->debug_binary = NULL; + hash->debug_text = NULL; + return hash; } -static void -hash_debug_buffer(struct mdfour *md, const void *s, size_t len) +struct hash * +hash_copy(struct hash *hash) { - if (!md->identifier || len == 0) { - return; - } - if (debug_hash_txt) { - fwrite(s, 1, len, debug_hash_file['t']); - } + struct hash *result = malloc(sizeof(struct hash)); + result->md = hash->md; + result->debug_binary = NULL; + result->debug_text = NULL; + return result; } -void -hash_start(struct mdfour *md) +void hash_free(struct hash *hash) { - mdfour_begin(md); + free(hash); } -void -hash_section(struct mdfour *md, const char *name) +void hash_enable_debug( + struct hash *hash, const char *section_name, + FILE *debug_binary, FILE *debug_text) { - hash_debug_buffer(md, "=== ", 4); - hash_debug_buffer(md, name, strlen(name)); - hash_debug_buffer(md, " ===", 4); - hash_debug_buffer(md, "\n", 1); + hash->debug_binary = debug_binary; + hash->debug_text = debug_text; + + do_debug_text(hash, "=== ", 4); + do_debug_text(hash, section_name, strlen(section_name)); + do_debug_text(hash, " ===\n", 5); +} + +size_t +hash_input_size(struct hash *hash) +{ + return hash->md.totalN; } void -hash_buffer(struct mdfour *md, const void *s, size_t len) +hash_buffer(struct hash *hash, const void *s, size_t len) { - hash_binary_buffer(md, s, len); - hash_debug_buffer(md, s, len); + do_hash_buffer(hash, s, len); + do_debug_text(hash, s, len); } -// Return the hash result as a hex string. Caller frees. char * -hash_result(struct mdfour *md) +hash_result(struct hash *hash) { unsigned char sum[16]; - hash_result_as_bytes(md, sum); - return format_hash_as_string(sum, (unsigned) md->totalN); + hash_result_as_bytes(hash, sum); + return format_hash_as_string(sum, (unsigned) hash->md.totalN); } -// Return the hash result as 16 binary bytes. void -hash_result_as_bytes(struct mdfour *md, unsigned char *out) +hash_result_as_bytes(struct hash *hash, unsigned char *out) { - mdfour_update(md, NULL, 0); - mdfour_result(md, out); + mdfour_update(&hash->md, NULL, 0); + mdfour_result(&hash->md, out); } bool -hash_equal(struct mdfour *md1, struct mdfour *md2) +hash_equal(struct hash *hash1, struct hash *hash2) { unsigned char sum1[16]; - hash_result_as_bytes(md1, sum1); + hash_result_as_bytes(hash1, sum1); unsigned char sum2[16]; - hash_result_as_bytes(md2, sum2); + hash_result_as_bytes(hash2, sum2); return memcmp(sum1, sum2, sizeof(sum1)) == 0; } -// Hash some data that is unlikely to occur in the input. The idea is twofold: -// -// - Delimit things like arguments from each other (e.g., so that -I -O2 and -// -I-O2 hash differently). -// - Tag different types of hashed information so that it's possible to do -// conditional hashing of information in a safe way (e.g., if we want to hash -// information X if CCACHE_A is set and information Y if CCACHE_B is set, -// there should never be a hash collision risk). void -hash_delimiter(struct mdfour *md, const char *type) +hash_delimiter(struct hash *hash, const char *type) { - hash_binary_buffer(md, HASH_DELIMITER, sizeof(HASH_DELIMITER)); - hash_binary_buffer(md, type, strlen(type) + 1); // Include NUL. - hash_debug_buffer(md, HASH_DEBUG_DELIMITER, strlen(HASH_DEBUG_DELIMITER)); - hash_debug_buffer(md, type, strlen(type)); - hash_debug_buffer(md, "\n", 1); + do_hash_buffer(hash, HASH_DELIMITER, sizeof(HASH_DELIMITER)); + do_hash_buffer(hash, type, strlen(type) + 1); // Include NUL. + do_debug_text(hash, "### ", 4); + do_debug_text(hash, type, strlen(type)); + do_debug_text(hash, "\n", 1); } void -hash_string(struct mdfour *md, const char *s) +hash_string(struct hash *hash, const char *s) { - hash_string_length(md, s, strlen(s)); + hash_string_buffer(hash, s, strlen(s)); } void -hash_string_length(struct mdfour *md, const char *s, int length) +hash_string_buffer(struct hash *hash, const char *s, int length) { - hash_binary_buffer(md, s, length); - hash_debug_buffer(md, s, length); - hash_debug_buffer(md, "\n", 1); + hash_buffer(hash, s, length); + do_debug_text(hash, "\n", 1); } void -hash_int(struct mdfour *md, int x) +hash_int(struct hash *hash, int x) { - hash_binary_buffer(md, (char *)&x, sizeof(x)); + do_hash_buffer(hash, (char *)&x, sizeof(x)); + char buf[16]; snprintf(buf, sizeof(buf), "%d", x); - hash_debug_buffer(md, buf, strlen(buf)); - hash_debug_buffer(md, "\n", 1); + do_debug_text(hash, buf, strlen(buf)); + do_debug_text(hash, "\n", 1); } -// Add contents of an open file to the hash. Returns true on success, otherwise -// false. bool -hash_fd(struct mdfour *md, int fd) +hash_fd(struct hash *hash, int fd) { char buf[READ_BUFFER_SIZE]; ssize_t n; @@ -186,17 +168,15 @@ hash_fd(struct mdfour *md, int fd) break; } if (n > 0) { - hash_binary_buffer(md, buf, n); - hash_debug_buffer(md, buf, n); + do_hash_buffer(hash, buf, n); + do_debug_text(hash, buf, n); } } return n == 0; } -// Add contents of a file to the hash. Returns true on success, otherwise -// false. bool -hash_file(struct mdfour *md, const char *fname) +hash_file(struct hash *hash, const char *fname) { int fd = open(fname, O_RDONLY|O_BINARY); if (fd == -1) { @@ -204,7 +184,7 @@ hash_file(struct mdfour *md, const char *fname) return false; } - bool ret = hash_fd(md, fd); + bool ret = hash_fd(hash, fd); close(fd); return ret; } diff --git a/src/hash.h b/src/hash.h new file mode 100644 index 00000000..06a698a0 --- /dev/null +++ b/src/hash.h @@ -0,0 +1,100 @@ +// Copyright (C) 2018 Joel Rosdahl +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#ifndef HASH_H +#define HASH_H + +#include "system.h" + +struct hash; + +// Create a new hash. +struct hash *hash_init(void); + +// Create a new hash from an existing hash state. +struct hash *hash_copy(struct hash *hash); + +// Free a hash created by hash_init or hash_copy. +void hash_free(struct hash *hash); + +// Enable debug logging of hashed input to a binary and a text file. +void hash_enable_debug( + struct hash *hash, const char *section_name, FILE *debug_binary, + FILE *debug_text); + +// Return how many bytes have been hashed. +size_t hash_input_size(struct hash *hash); + +// Return the hash result as a hex string. Caller frees. +char *hash_result(struct hash *hash); + +// Return the hash result as 16 binary bytes. +void hash_result_as_bytes(struct hash *hash, unsigned char *out); + +// Return whether hash1 and hash2 are equal. +bool hash_equal(struct hash *hash1, struct hash *hash2); + +// Hash some data that is unlikely to occur in the input. The idea is twofold: +// +// - Delimit things like arguments from each other (e.g., so that -I -O2 and +// -I-O2 hash differently). +// - Tag different types of hashed information so that it's possible to do +// conditional hashing of information in a safe way (e.g., if we want to hash +// information X if CCACHE_A is set and information Y if CCACHE_B is set, +// there should never be a hash collision risk). +void hash_delimiter(struct hash *hash, const char *type); + +// Hash bytes in a buffer. +// +// If hash debugging is enabled, the bytes are written verbatim to the text +// input file. +void hash_buffer(struct hash *hash, const void *s, size_t len); + +// Hash a string. +// +// If hash debugging is enabled, the string is written to the text input file +// followed by a newline. +void hash_string(struct hash *hash, const char *s); + +// Hash a string with a known size. +// +// If hash debugging is enabled, the string is written to the text input file +// followed by a newline. +void hash_string_buffer(struct hash *hash, const char *s, int length); + +// Hash an integer. +// +// If hash debugging is enabled, the integer is written in text form to the +// text input file followed by a newline. +void hash_int(struct hash *hash, int x); + +// Add contents of an open file to the hash. +// +// If hash debugging is enabled, the data is written verbatim to the text input +// file. +// +// Returns true on success, otherwise false. +bool hash_fd(struct hash *hash, int fd); + +// Add contents of a file to the hash. +// +// If hash debugging is enabled, the data is written verbatim to the text input +// file. +// +// Returns true on success, otherwise false. +bool hash_file(struct hash *hash, const char *fname); + +#endif diff --git a/src/hashutil.c b/src/hashutil.c index c543d4d1..4ed10d77 100644 --- a/src/hashutil.c +++ b/src/hashutil.c @@ -89,7 +89,7 @@ check_for_temporal_macros(const char *str, size_t len) // Hash a string. Returns a bitmask of HASH_SOURCE_CODE_* results. int hash_source_code_string( - struct conf *conf, struct mdfour *hash, const char *str, size_t len, + struct conf *conf, struct hash *hash, const char *str, size_t len, const char *path) { int result = HASH_SOURCE_CODE_OK; @@ -101,7 +101,7 @@ hash_source_code_string( } // Hash the source string. - hash_string_length(hash, str, len); + hash_string_buffer(hash, str, len); if (result & HASH_SOURCE_CODE_FOUND_DATE) { // Make sure that the hash sum changes if the (potential) expansion of @@ -130,7 +130,7 @@ hash_source_code_string( // Hash a file ignoring comments. Returns a bitmask of HASH_SOURCE_CODE_* // results. int -hash_source_code_file(struct conf *conf, struct mdfour *hash, const char *path) +hash_source_code_file(struct conf *conf, struct hash *hash, const char *path) { if (is_precompiled_header(path)) { if (hash_file(hash, path)) { @@ -151,7 +151,7 @@ hash_source_code_file(struct conf *conf, struct mdfour *hash, const char *path) } bool -hash_command_output(struct mdfour *hash, const char *command, +hash_command_output(struct hash *hash, const char *command, const char *compiler) { #ifdef _WIN32 @@ -292,7 +292,7 @@ hash_command_output(struct mdfour *hash, const char *command, } bool -hash_multicommand_output(struct mdfour *hash, const char *commands, +hash_multicommand_output(struct hash *hash, const char *commands, const char *compiler) { char *command_string = x_strdup(commands); diff --git a/src/hashutil.h b/src/hashutil.h index ae9abf19..b6b916ef 100644 --- a/src/hashutil.h +++ b/src/hashutil.h @@ -1,8 +1,24 @@ +// Copyright (C) 2009-2018 Joel Rosdahl +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + #ifndef HASHUTIL_H #define HASHUTIL_H #include "conf.h" -#include "mdfour.h" +#include "hash.h" #include struct file_hash @@ -23,13 +39,13 @@ int file_hashes_equal(struct file_hash *fh1, struct file_hash *fh2); int check_for_temporal_macros(const char *str, size_t len); int hash_source_code_string( - struct conf *conf, struct mdfour *hash, const char *str, size_t len, + struct conf *conf, struct hash *hash, const char *str, size_t len, const char *path); int hash_source_code_file( - struct conf *conf, struct mdfour *hash, const char *path); -bool hash_command_output(struct mdfour *hash, const char *command, + struct conf *conf, struct hash *hash, const char *path); +bool hash_command_output(struct hash *hash, const char *command, const char *compiler); -bool hash_multicommand_output(struct mdfour *hash, const char *command, +bool hash_multicommand_output(struct hash *hash, const char *command, const char *compiler); #endif diff --git a/src/manifest.c b/src/manifest.c index b355564d..5e0bcac6 100644 --- a/src/manifest.c +++ b/src/manifest.c @@ -411,20 +411,22 @@ verify_object(struct conf *conf, struct manifest *mf, struct object *obj, struct file_hash *actual = hashtable_search(hashed_files, path); if (!actual) { - struct mdfour hash; - hash_start(&hash); - int result = hash_source_code_file(conf, &hash, path); + struct hash *hash = hash_init(); + int result = hash_source_code_file(conf, hash, path); if (result & HASH_SOURCE_CODE_ERROR) { cc_log("Failed hashing %s", path); + hash_free(hash); return 0; } if (result & HASH_SOURCE_CODE_FOUND_TIME) { + hash_free(hash); return 0; } actual = x_malloc(sizeof(*actual)); - hash_result_as_bytes(&hash, actual->hash); - actual->size = hash.totalN; + hash_result_as_bytes(hash, actual->hash); + actual->size = hash_input_size(hash); hashtable_insert(hashed_files, x_strdup(path), actual); + hash_free(hash); } if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0 || fi->size != actual->size) { diff --git a/src/mdfour.c b/src/mdfour.c index 085d1862..11e0fba1 100644 --- a/src/mdfour.c +++ b/src/mdfour.c @@ -16,6 +16,7 @@ // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "ccache.h" +#include "mdfour.h" // NOTE: This code makes no attempt to be fast! @@ -129,12 +130,6 @@ mdfour_begin(struct mdfour *md) md->totalN = 0; md->tail_len = 0; md->finalized = 0; - md->identifier = 0; -} - -void mdfour_identify(struct mdfour *md, int identifier) -{ - md->identifier = identifier; } static diff --git a/src/mdfour.h b/src/mdfour.h index 1970723a..c196a09e 100644 --- a/src/mdfour.h +++ b/src/mdfour.h @@ -10,11 +10,9 @@ struct mdfour { unsigned char tail[64]; size_t tail_len; int finalized; - int identifier; }; void mdfour_begin(struct mdfour *md); -void mdfour_identify(struct mdfour *md, int identifier); void mdfour_update(struct mdfour *md, const unsigned char *in, size_t n); void mdfour_result(struct mdfour *md, unsigned char *out); diff --git a/src/unify.c b/src/unify.c index 16df4683..8cb69fc7 100644 --- a/src/unify.c +++ b/src/unify.c @@ -1,5 +1,5 @@ // Copyright (C) 2002 Andrew Tridgell -// Copyright (C) 2009-2017 Joel Rosdahl +// Copyright (C) 2009-2018 Joel Rosdahl // // This program is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the Free @@ -28,6 +28,7 @@ // compiler (for example, inline assembly systems). #include "ccache.h" +#include "hash.h" static bool print_unified = true; @@ -102,7 +103,7 @@ build_table(void) // Buffer up characters before hashing them. static void -pushchar(struct mdfour *hash, unsigned char c) +pushchar(struct hash *hash, unsigned char c) { static unsigned char buf[64]; static size_t len; @@ -131,7 +132,7 @@ pushchar(struct mdfour *hash, unsigned char c) // Hash some C/C++ code after unifying. static void -unify(struct mdfour *hash, unsigned char *p, size_t size) +unify(struct hash *hash, unsigned char *p, size_t size) { build_table(); @@ -246,7 +247,7 @@ unify(struct mdfour *hash, unsigned char *p, size_t size) // Hash a file that consists of preprocessor output, but remove any line number // information from the hash. int -unify_hash(struct mdfour *hash, const char *fname, bool debug) +unify_hash(struct hash *hash, const char *fname, bool debug) { char *data; size_t size; diff --git a/src/unify.h b/src/unify.h new file mode 100644 index 00000000..eac167dc --- /dev/null +++ b/src/unify.h @@ -0,0 +1,24 @@ +// Copyright (C) 2018 Joel Rosdahl +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#ifndef UNIFY_H +#define UNIFY_H + +#include "hash.h" + +int unify_hash(struct hash *hash, const char *fname, bool print); + +#endif diff --git a/unittest/test_hash.c b/unittest/test_hash.c index 823d8e13..5278560e 100644 --- a/unittest/test_hash.c +++ b/unittest/test_hash.c @@ -17,41 +17,54 @@ // This file contains tests for functions in hash.c. #include "../src/ccache.h" +#include "../src/hash.h" #include "framework.h" -TEST_SUITE(hash) +TEST_SUITE(mdfour) TEST(test_vectors_from_rfc_1320_should_be_correct) { - struct mdfour h; + { + struct hash *h = hash_init(); + hash_string(h, ""); + CHECK_STR_EQ_FREE2("31d6cfe0d16ae931b73c59d7e0c089c0-0", hash_result(h)); + hash_free(h); + } - hash_start(&h); - hash_string(&h, ""); - CHECK_STR_EQ_FREE2("31d6cfe0d16ae931b73c59d7e0c089c0-0", hash_result(&h)); + { + struct hash *h = hash_init(); + hash_string(h, "a"); + CHECK_STR_EQ_FREE2("bde52cb31de33e46245e05fbdbd6fb24-1", hash_result(h)); + hash_free(h); + } - hash_start(&h); - hash_string(&h, "a"); - CHECK_STR_EQ_FREE2("bde52cb31de33e46245e05fbdbd6fb24-1", hash_result(&h)); + { + struct hash *h = hash_init(); + hash_string(h, "message digest"); + CHECK_STR_EQ_FREE2("d9130a8164549fe818874806e1c7014b-14", hash_result(h)); + hash_free(h); + } - hash_start(&h); - hash_string(&h, "message digest"); - CHECK_STR_EQ_FREE2("d9130a8164549fe818874806e1c7014b-14", hash_result(&h)); - - hash_start(&h); - hash_string( - &h, - "12345678901234567890123456789012345678901234567890123456789012345678901234567890"); - CHECK_STR_EQ_FREE2("e33b4ddc9c38f2199c3e7b164fcc0536-80", hash_result(&h)); + { + struct hash *h = hash_init(); + hash_string( + h, + "12345678901234567890123456789012345678901234567890123456789012345678901" + "234567890"); + CHECK_STR_EQ_FREE2("e33b4ddc9c38f2199c3e7b164fcc0536-80", hash_result(h)); + hash_free(h); + } } TEST(hash_result_should_be_idempotent) { - struct mdfour h; + struct hash *h = hash_init(); + + hash_string(h, ""); + CHECK_STR_EQ_FREE2("31d6cfe0d16ae931b73c59d7e0c089c0-0", hash_result(h)); + CHECK_STR_EQ_FREE2("31d6cfe0d16ae931b73c59d7e0c089c0-0", hash_result(h)); - hash_start(&h); - hash_string(&h, ""); - CHECK_STR_EQ_FREE2("31d6cfe0d16ae931b73c59d7e0c089c0-0", hash_result(&h)); - CHECK_STR_EQ_FREE2("31d6cfe0d16ae931b73c59d7e0c089c0-0", hash_result(&h)); + hash_free(h); } TEST_SUITE_END diff --git a/unittest/test_hashutil.c b/unittest/test_hashutil.c index 5fd8f0ef..2b2dbc9b 100644 --- a/unittest/test_hashutil.c +++ b/unittest/test_hashutil.c @@ -25,86 +25,107 @@ TEST_SUITE(hashutil) TEST(hash_command_output_simple) { - struct mdfour h1, h2; - hash_start(&h1); - hash_start(&h2); - CHECK(hash_command_output(&h1, "echo", "not used")); - CHECK(hash_command_output(&h2, "echo", "not used")); - CHECK(hash_equal(&h1, &h2)); + struct hash *h1 = hash_init(); + struct hash *h2 = hash_init(); + + CHECK(hash_command_output(h1, "echo", "not used")); + CHECK(hash_command_output(h2, "echo", "not used")); + CHECK(hash_equal(h1, h2)); + + hash_free(h2); + hash_free(h1); } TEST(hash_command_output_space_removal) { - struct mdfour h1, h2; - hash_start(&h1); - hash_start(&h2); - CHECK(hash_command_output(&h1, "echo", "not used")); - CHECK(hash_command_output(&h2, " echo ", "not used")); - CHECK(hash_equal(&h1, &h2)); + struct hash *h1 = hash_init(); + struct hash *h2 = hash_init(); + + CHECK(hash_command_output(h1, "echo", "not used")); + CHECK(hash_command_output(h2, " echo ", "not used")); + CHECK(hash_equal(h1, h2)); + + hash_free(h2); + hash_free(h1); } TEST(hash_command_output_hash_inequality) { - struct mdfour h1, h2; - hash_start(&h1); - hash_start(&h2); - CHECK(hash_command_output(&h1, "echo foo", "not used")); - CHECK(hash_command_output(&h2, "echo bar", "not used")); - CHECK(!hash_equal(&h1, &h2)); + struct hash *h1 = hash_init(); + struct hash *h2 = hash_init(); + + CHECK(hash_command_output(h1, "echo foo", "not used")); + CHECK(hash_command_output(h2, "echo bar", "not used")); + CHECK(!hash_equal(h1, h2)); + + hash_free(h2); + hash_free(h1); } TEST(hash_command_output_compiler_substitution) { - struct mdfour h1, h2; - hash_start(&h1); - hash_start(&h2); - CHECK(hash_command_output(&h1, "echo foo", "not used")); - CHECK(hash_command_output(&h2, "%compiler% foo", "echo")); - CHECK(hash_equal(&h1, &h2)); + struct hash *h1 = hash_init(); + struct hash *h2 = hash_init(); + + CHECK(hash_command_output(h1, "echo foo", "not used")); + CHECK(hash_command_output(h2, "%compiler% foo", "echo")); + CHECK(hash_equal(h1, h2)); + + hash_free(h2); + hash_free(h1); } TEST(hash_command_output_stdout_versus_stderr) { - struct mdfour h1, h2; - hash_start(&h1); - hash_start(&h2); + struct hash *h1 = hash_init(); + struct hash *h2 = hash_init(); + #ifndef _WIN32 create_file("stderr.sh", "#!/bin/sh\necho foo >&2\n"); chmod("stderr.sh", 0555); - CHECK(hash_command_output(&h1, "echo foo", "not used")); - CHECK(hash_command_output(&h2, "./stderr.sh", "not used")); + CHECK(hash_command_output(h1, "echo foo", "not used")); + CHECK(hash_command_output(h2, "./stderr.sh", "not used")); #else create_file("stderr.bat", "@echo off\r\necho foo>&2\r\n"); - CHECK(hash_command_output(&h1, "echo foo", "not used")); - CHECK(hash_command_output(&h2, "stderr.bat", "not used")); + CHECK(hash_command_output(h1, "echo foo", "not used")); + CHECK(hash_command_output(h2, "stderr.bat", "not used")); #endif - CHECK(hash_equal(&h1, &h2)); + CHECK(hash_equal(h1, h2)); + + hash_free(h2); + hash_free(h1); } TEST(hash_multicommand_output) { - struct mdfour h1, h2; - hash_start(&h1); - hash_start(&h2); + struct hash *h1 = hash_init(); + struct hash *h2 = hash_init(); + #ifndef _WIN32 create_file("foo.sh", "#!/bin/sh\necho foo\necho bar\n"); chmod("foo.sh", 0555); - CHECK(hash_multicommand_output(&h2, "echo foo; echo bar", "not used")); - CHECK(hash_multicommand_output(&h1, "./foo.sh", "not used")); + CHECK(hash_multicommand_output(h2, "echo foo; echo bar", "not used")); + CHECK(hash_multicommand_output(h1, "./foo.sh", "not used")); #else create_file("foo.bat", "@echo off\r\necho foo\r\necho bar\r\n"); - CHECK(hash_multicommand_output(&h2, "echo foo; echo bar", "not used")); - CHECK(hash_multicommand_output(&h1, "foo.bat", "not used")); + CHECK(hash_multicommand_output(h2, "echo foo; echo bar", "not used")); + CHECK(hash_multicommand_output(h1, "foo.bat", "not used")); #endif - CHECK(hash_equal(&h1, &h2)); + CHECK(hash_equal(h1, h2)); + + hash_free(h2); + hash_free(h1); } TEST(hash_multicommand_output_error_handling) { - struct mdfour h1, h2; - hash_start(&h1); - hash_start(&h2); - CHECK(!hash_multicommand_output(&h2, "false; true", "not used")); + struct hash *h1 = hash_init(); + struct hash *h2 = hash_init(); + + CHECK(!hash_multicommand_output(h2, "false; true", "not used")); + + hash_free(h2); + hash_free(h1); } TEST(check_for_temporal_macros) -- cgit v1.2.1