diff options
Diffstat (limited to 'src/redis-check-aof.c')
-rw-r--r-- | src/redis-check-aof.c | 549 |
1 files changed, 414 insertions, 135 deletions
diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c index 01f42ec1b..a3da79dd4 100644 --- a/src/redis-check-aof.c +++ b/src/redis-check-aof.c @@ -30,6 +30,24 @@ #include "server.h" #include <sys/stat.h> +#include <sys/types.h> +#include <regex.h> +#include <libgen.h> + +#define AOF_CHECK_OK 0 +#define AOF_CHECK_EMPTY 1 +#define AOF_CHECK_TRUNCATED 2 +#define AOF_CHECK_TIMESTAMP_TRUNCATED 3 + +typedef enum { + AOF_RESP, + AOF_RDB_PREAMBLE, + AOF_MULTI_PART, +} input_file_type; + +aofManifest *aofManifestCreate(void); +void aofManifestFree(aofManifest *am); +aofManifest *aofLoadManifestFromFile(sds am_filepath); #define ERROR(...) { \ char __buf[1024]; \ @@ -51,47 +69,6 @@ int consumeNewline(char *buf) { return 1; } -int readAnnotations(FILE *fp) { - char buf[AOF_ANNOTATION_LINE_MAX_LEN]; - while (1) { - epos = ftello(fp); - if (fgets(buf, sizeof(buf), fp) == NULL) { - return 0; - } - if (buf[0] == '#') { - if (to_timestamp && strncmp(buf, "#TS:", 4) == 0) { - time_t ts = strtol(buf+4, NULL, 10); - if (ts <= to_timestamp) continue; - if (epos == 0) { - printf("AOF has nothing before timestamp %ld, " - "aborting...\n", to_timestamp); - fclose(fp); - exit(1); - } - /* Truncate remaining AOF if exceeding 'to_timestamp' */ - if (ftruncate(fileno(fp), epos) == -1) { - printf("Failed to truncate AOF to timestamp %ld\n", - to_timestamp); - exit(1); - } else { - printf("Successfully truncated AOF to timestamp %ld\n", - to_timestamp); - fclose(fp); - exit(0); - } - } - continue; - } else { - if (fseek(fp, -(ftello(fp)-epos), SEEK_CUR) == -1) { - ERROR("Fseek error: %s", strerror(errno)); - return 0; - } - return 1; - } - } - return 1; -} - int readLong(FILE *fp, char prefix, long *target) { char buf[128], *eptr; epos = ftello(fp); @@ -133,9 +110,13 @@ int readString(FILE *fp, char** target) { len += 2; *target = (char*)zmalloc(len); if (!readBytes(fp,*target,len)) { + zfree(*target); + *target = NULL; return 0; } if (!consumeNewline(*target+len-2)) { + zfree(*target); + *target = NULL; return 0; } (*target)[len-2] = '\0'; @@ -146,156 +127,454 @@ int readArgc(FILE *fp, long *target) { return readLong(fp,'*',target); } -off_t process(FILE *fp) { +/* Used to decode a RESP record in the AOF file to obtain the original + * redis command, and also check whether the command is MULTI/EXEC. If the + * command is MULTI, the parameter out_multi will be incremented by one, and + * if the command is EXEC, the parameter out_multi will be decremented + * by one. The parameter out_multi will be used by the upper caller to determine + * whether the AOF file contains unclosed transactions. + **/ +int processRESP(FILE *fp, char *filename, int *out_multi) { long argc; - off_t pos = 0; - int i, multi = 0; char *str; - while(1) { - if (!multi) pos = ftello(fp); - if (!readAnnotations(fp)) break; - if (!readArgc(fp, &argc)) break; - - for (i = 0; i < argc; i++) { - if (!readString(fp,&str)) break; - if (i == 0) { - if (strcasecmp(str, "multi") == 0) { - if (multi++) { - ERROR("Unexpected MULTI"); - break; - } - } else if (strcasecmp(str, "exec") == 0) { - if (--multi) { - ERROR("Unexpected EXEC"); - break; - } + if (!readArgc(fp, &argc)) return 0; + + for (int i = 0; i < argc; i++) { + if (!readString(fp, &str)) return 0; + if (i == 0) { + if (strcasecmp(str, "multi") == 0) { + if ((*out_multi)++) { + ERROR("Unexpected MULTI in AOF %s", filename); + zfree(str); + return 0; + } + } else if (strcasecmp(str, "exec") == 0) { + if (--(*out_multi)) { + ERROR("Unexpected EXEC in AOF %s", filename); + zfree(str); + return 0; } } - zfree(str); - } - - /* Stop if the loop did not finish */ - if (i < argc) { - if (str) zfree(str); - break; } + zfree(str); } - if (feof(fp) && multi && strlen(error) == 0) { - ERROR("Reached EOF before reading EXEC for MULTI"); - } - if (strlen(error) > 0) { - printf("%s\n", error); - } - return pos; + return 1; } -int redis_check_aof_main(int argc, char **argv) { - char *filename; - int fix = 0; +/* Used to parse an annotation in the AOF file, the annotation starts with '#' + * in AOF. Currently AOF only contains timestamp annotations, but this function + * can easily be extended to handle other annotations. + * + * The processing rule of time annotation is that once the timestamp is found to + * be greater than 'to_timestamp', the AOF after the annotation is truncated. + * Note that in Multi Part AOF, this truncation is only allowed when the last_file + * parameter is 1. + **/ +int processAnnotations(FILE *fp, char *filename, int last_file) { + char buf[AOF_ANNOTATION_LINE_MAX_LEN]; - if (argc < 2) { - goto invalid_args; - } else if (argc == 2) { - filename = argv[1]; - } else if (argc == 3) { - if (!strcmp(argv[1],"--fix")) { - filename = argv[2]; - fix = 1; - } else { - goto invalid_args; + epos = ftello(fp); + if (fgets(buf, sizeof(buf), fp) == NULL) { + printf("Failed to read annotations from AOF %s, aborting...\n", filename); + exit(1); + } + + if (to_timestamp && strncmp(buf, "#TS:", 4) == 0) { + char *endptr; + errno = 0; + time_t ts = strtol(buf+4, &endptr, 10); + if (errno != 0 || *endptr != '\r') { + printf("Invalid timestamp annotation\n"); + exit(1); } - } else if (argc == 4) { - if (!strcmp(argv[1], "--truncate-to-timestamp")) { - to_timestamp = strtol(argv[2],NULL,10); - filename = argv[3]; + if (ts <= to_timestamp) return 1; + if (epos == 0) { + printf("AOF %s has nothing before timestamp %ld, " + "aborting...\n", filename, to_timestamp); + exit(1); + } + if (!last_file) { + printf("Failed to truncate AOF %s to timestamp %ld to offset %ld because it is not the last file.\n", + filename, to_timestamp, (long int)epos); + printf("If you insist, please delete all files after this file according to the manifest " + "file and delete the corresponding records in manifest file manually. Then re-run redis-check-aof.\n"); + exit(1); + } + /* Truncate remaining AOF if exceeding 'to_timestamp' */ + if (ftruncate(fileno(fp), epos) == -1) { + printf("Failed to truncate AOF %s to timestamp %ld\n", + filename, to_timestamp); + exit(1); } else { - goto invalid_args; + return 0; } - } else { - goto invalid_args; } + return 1; +} + +/* Used to check the validity of a single AOF file. The AOF file can be: + * 1. Old-style AOF + * 2. Old-style RDB-preamble AOF + * 3. BASE or INCR in Multi Part AOF + * */ +int checkSingleAof(char *aof_filename, char *aof_filepath, int last_file, int fix, int preamble) { + off_t pos = 0, diff; + int multi = 0; + char buf[2]; - FILE *fp = fopen(filename,"r+"); + FILE *fp = fopen(aof_filepath, "r+"); if (fp == NULL) { - printf("Cannot open file: %s\n", filename); + printf("Cannot open file %s: %s, aborting...\n", aof_filepath, strerror(errno)); exit(1); } struct redis_stat sb; if (redis_fstat(fileno(fp),&sb) == -1) { - printf("Cannot stat file: %s\n", filename); + printf("Cannot stat file: %s, aborting...\n", aof_filename); exit(1); } off_t size = sb.st_size; if (size == 0) { - printf("Empty file: %s\n", filename); - exit(1); + return AOF_CHECK_EMPTY; } - /* This AOF file may have an RDB preamble. Check this to start, and if this - * is the case, start processing the RDB part. */ - if (size >= 8) { /* There must be at least room for the RDB header. */ - char sig[5]; - int has_preamble = fread(sig,sizeof(sig),1,fp) == 1 && - memcmp(sig,"REDIS",sizeof(sig)) == 0; - rewind(fp); - if (has_preamble) { - printf("The AOF appears to start with an RDB preamble.\n" - "Checking the RDB preamble to start:\n"); - if (redis_check_rdb_main(argc,argv,fp) == C_ERR) { - printf("RDB preamble of AOF file is not sane, aborting.\n"); - exit(1); - } else { - printf("RDB preamble is OK, proceeding with AOF tail...\n"); + if (preamble) { + char *argv[2] = {NULL, aof_filename}; + if (redis_check_rdb_main(2, argv, fp) == C_ERR) { + printf("RDB preamble of AOF file is not sane, aborting.\n"); + exit(1); + } else { + printf("RDB preamble is OK, proceeding with AOF tail...\n"); + } + } + + while(1) { + if (!multi) pos = ftello(fp); + if (fgets(buf, sizeof(buf), fp) == NULL) { + if (feof(fp)) { + break; + } + printf("Failed to read from AOF %s, aborting...\n", aof_filename); + exit(1); + } + + if (fseek(fp, -1, SEEK_CUR) == -1) { + printf("Failed to fseek in AOF %s: %s", aof_filename, strerror(errno)); + exit(1); + } + + if (buf[0] == '#') { + if (!processAnnotations(fp, aof_filepath, last_file)) { + fclose(fp); + return AOF_CHECK_TIMESTAMP_TRUNCATED; } + } else if (buf[0] == '*'){ + if (!processRESP(fp, aof_filepath, &multi)) break; + } else { + printf("AOF %s format error\n", aof_filename); + break; } } - off_t pos = process(fp); - off_t diff = size-pos; + if (feof(fp) && multi && strlen(error) == 0) { + ERROR("Reached EOF before reading EXEC for MULTI"); + } + + if (strlen(error) > 0) { + printf("%s\n", error); + } + + diff = size-pos; /* In truncate-to-timestamp mode, just exit if there is nothing to truncate. */ if (diff == 0 && to_timestamp) { - printf("Truncate nothing in AOF to timestamp %ld\n", to_timestamp); + printf("Truncate nothing in AOF %s to timestamp %ld\n", aof_filename, to_timestamp); fclose(fp); - exit(0); + return AOF_CHECK_OK; } - printf("AOF analyzed: size=%lld, ok_up_to=%lld, ok_up_to_line=%lld, diff=%lld\n", - (long long) size, (long long) pos, line, (long long) diff); + printf("AOF analyzed: filename=%s, size=%lld, ok_up_to=%lld, ok_up_to_line=%lld, diff=%lld\n", + aof_filename, (long long) size, (long long) pos, line, (long long) diff); if (diff > 0) { if (fix) { + if (!last_file) { + printf("Failed to truncate AOF %s because it is not the last file\n", aof_filename); + exit(1); + } + char buf[2]; - printf("This will shrink the AOF from %lld bytes, with %lld bytes, to %lld bytes\n",(long long)size,(long long)diff,(long long)pos); + printf("This will shrink the AOF %s from %lld bytes, with %lld bytes, to %lld bytes\n", + aof_filename, (long long)size, (long long)diff, (long long)pos); printf("Continue? [y/N]: "); - if (fgets(buf,sizeof(buf),stdin) == NULL || - strncasecmp(buf,"y",1) != 0) { - printf("Aborting...\n"); - exit(1); + if (fgets(buf, sizeof(buf), stdin) == NULL || strncasecmp(buf, "y", 1) != 0) { + printf("Aborting...\n"); + exit(1); } if (ftruncate(fileno(fp), pos) == -1) { - printf("Failed to truncate AOF\n"); + printf("Failed to truncate AOF %s\n", aof_filename); exit(1); } else { - printf("Successfully truncated AOF\n"); + fclose(fp); + return AOF_CHECK_TRUNCATED; } } else { - printf("AOF is not valid. " - "Use the --fix option to try fixing it.\n"); + printf("AOF %s is not valid. Use the --fix option to try fixing it.\n", aof_filename); exit(1); } - } else { - printf("AOF is valid\n"); + } + fclose(fp); + return AOF_CHECK_OK; +} + +/* Used to determine whether the file is a RDB file. These two possibilities: + * 1. The file is an old style RDB-preamble AOF + * 2. The file is a BASE AOF in Multi Part AOF + * */ +int fileIsRDB(char *filepath) { + FILE *fp = fopen(filepath, "r"); + if (fp == NULL) { + printf("Cannot open file %s: %s\n", filepath, strerror(errno)); + exit(1); + } + + struct redis_stat sb; + if (redis_fstat(fileno(fp), &sb) == -1) { + printf("Cannot stat file: %s\n", filepath); + exit(1); + } + + off_t size = sb.st_size; + if (size == 0) { + fclose(fp); + return 0; + } + + if (size >= 8) { /* There must be at least room for the RDB header. */ + char sig[5]; + int rdb_file = fread(sig, sizeof(sig), 1, fp) == 1 && + memcmp(sig, "REDIS", sizeof(sig)) == 0; + if (rdb_file) { + fclose(fp); + return 1; + } + } + + fclose(fp); + return 0; +} + +/* Used to determine whether the file is a manifest file. */ +#define MANIFEST_MAX_LINE 1024 +int fileIsManifest(char *filepath) { + int is_manifest = 0; + FILE *fp = fopen(filepath, "r"); + if (fp == NULL) { + printf("Cannot open file %s: %s\n", filepath, strerror(errno)); + exit(1); + } + + struct redis_stat sb; + if (redis_fstat(fileno(fp), &sb) == -1) { + printf("Cannot stat file: %s\n", filepath); + exit(1); + } + + off_t size = sb.st_size; + if (size == 0) { + fclose(fp); + return 0; + } + + char buf[MANIFEST_MAX_LINE+1]; + while (1) { + if (fgets(buf, MANIFEST_MAX_LINE+1, fp) == NULL) { + if (feof(fp)) { + break; + } else { + printf("Cannot read file: %s\n", filepath); + exit(1); + } + } + + /* Skip comments lines */ + if (buf[0] == '#') { + continue; + } else if (!memcmp(buf, "file", strlen("file"))) { + is_manifest = 1; + } } fclose(fp); + return is_manifest; +} + +/* Get the format of the file to be checked. It can be: + * AOF_RESP: Old-style AOF + * AOF_RDB_PREAMBLE: Old-style RDB-preamble AOF + * AOF_MULTI_PART: manifest in Multi Part AOF + * + * redis-check-aof tool will automatically perform different + * verification logic according to different file formats. + * */ +input_file_type getInputFileType(char *filepath) { + if (fileIsManifest(filepath)) { + return AOF_MULTI_PART; + } else if (fileIsRDB(filepath)) { + return AOF_RDB_PREAMBLE; + } else { + return AOF_RESP; + } +} + +/* Check if Multi Part AOF is valid. It will check the BASE file and INCR files + * at once according to the manifest instructions (this is somewhat similar to + * redis' AOF loading). + * + * When the verification is successful, we can guarantee: + * 1. The manifest file format is valid + * 2. Both BASE AOF and INCR AOFs format are valid + * 3. No BASE or INCR AOFs files are missing + * + * Note that in Multi Part AOF, we only allow truncation for the last AOF file. + * */ +void checkMultiPartAof(char *dirpath, char *manifest_filepath, int fix) { + int total_num = 0, aof_num = 0, last_file; + int ret; + + printf("Start checking Multi Part AOF\n"); + aofManifest *am = aofLoadManifestFromFile(manifest_filepath); + + if (am->base_aof_info) total_num++; + if (am->incr_aof_list) total_num += listLength(am->incr_aof_list); + + if (am->base_aof_info) { + sds aof_filename = am->base_aof_info->file_name; + sds aof_filepath = makePath(dirpath, aof_filename); + last_file = ++aof_num == total_num; + int aof_preable = fileIsRDB(aof_filepath); + + printf("Start to check BASE AOF (%s format).\n", aof_preable ? "RDB":"RESP"); + ret = checkSingleAof(aof_filename, aof_filepath, last_file, fix, aof_preable); + if (ret == AOF_CHECK_OK) { + printf("BASE AOF %s is valid\n", aof_filename); + } else if (ret == AOF_CHECK_EMPTY) { + printf("BASE AOF %s is empty\n", aof_filename); + } else if (ret == AOF_CHECK_TIMESTAMP_TRUNCATED) { + printf("Successfully truncated AOF %s to timestamp %ld\n", + aof_filename, to_timestamp); + } else if (ret == AOF_CHECK_TRUNCATED) { + printf("Successfully truncated AOF %s\n", aof_filename); + } + sdsfree(aof_filepath); + } + + if (listLength(am->incr_aof_list)) { + listNode *ln; + listIter li; + + printf("Start to check INCR files.\n"); + listRewind(am->incr_aof_list, &li); + while ((ln = listNext(&li)) != NULL) { + aofInfo *ai = (aofInfo*)ln->value; + sds aof_filename = (char*)ai->file_name; + sds aof_filepath = makePath(dirpath, aof_filename); + last_file = ++aof_num == total_num; + ret = checkSingleAof(aof_filename, aof_filepath, last_file, fix, 0); + if (ret == AOF_CHECK_OK) { + printf("INCR AOF %s is valid\n", aof_filename); + } else if (ret == AOF_CHECK_EMPTY) { + printf("INCR AOF %s is empty\n", aof_filename); + } else if (ret == AOF_CHECK_TIMESTAMP_TRUNCATED) { + printf("Successfully truncated AOF %s to timestamp %ld\n", + aof_filename, to_timestamp); + } else if (ret == AOF_CHECK_TRUNCATED) { + printf("Successfully truncated AOF %s\n", aof_filename); + } + sdsfree(aof_filepath); + } + } + + aofManifestFree(am); + printf("All AOF files and manifest are valid\n"); +} + +/* Check if old style AOF is valid. Internally, it will identify whether + * the AOF is in RDB-preamble format, and will eventually call `checkSingleAof` + * to do the check. */ +void checkOldStyleAof(char *filepath, int fix, int preamble) { + printf("Start checking Old-Style AOF\n"); + int ret = checkSingleAof(filepath, filepath, 1, fix, preamble); + if (ret == AOF_CHECK_OK) { + printf("AOF %s is valid\n", filepath); + } else if (ret == AOF_CHECK_EMPTY) { + printf("AOF %s is empty\n", filepath); + } else if (ret == AOF_CHECK_TIMESTAMP_TRUNCATED) { + printf("Successfully truncated AOF %s to timestamp %ld\n", + filepath, to_timestamp); + } else if (ret == AOF_CHECK_TRUNCATED) { + printf("Successfully truncated AOF %s\n", filepath); + } +} + +int redis_check_aof_main(int argc, char **argv) { + char *filepath; + char temp_filepath[PATH_MAX + 1]; + char *dirpath; + int fix = 0; + + if (argc < 2) { + goto invalid_args; + } else if (argc == 2) { + filepath = argv[1]; + } else if (argc == 3) { + if (!strcmp(argv[1], "--fix")) { + filepath = argv[2]; + fix = 1; + } else { + goto invalid_args; + } + } else if (argc == 4) { + if (!strcmp(argv[1], "--truncate-to-timestamp")) { + char *endptr; + errno = 0; + to_timestamp = strtol(argv[2], &endptr, 10); + if (errno != 0 || *endptr != '\0') { + printf("Invalid timestamp, aborting...\n"); + exit(1); + } + filepath = argv[3]; + } else { + goto invalid_args; + } + } else { + goto invalid_args; + } + + /* In the glibc implementation dirname may modify their argument. */ + memcpy(temp_filepath, filepath, strlen(filepath) + 1); + dirpath = dirname(temp_filepath); + + /* Select the corresponding verification method according to the input file type. */ + input_file_type type = getInputFileType(filepath); + switch (type) { + case AOF_MULTI_PART: + checkMultiPartAof(dirpath, filepath, fix); + break; + case AOF_RESP: + checkOldStyleAof(filepath, fix, 0); + break; + case AOF_RDB_PREAMBLE: + checkOldStyleAof(filepath, fix, 1); + break; + } + exit(0); invalid_args: - printf("Usage: %s [--fix|--truncate-to-timestamp $timestamp] <file.aof>\n", - argv[0]); + printf("Usage: %s [--fix|--truncate-to-timestamp $timestamp] <file.manifest|file.aof>\n", + argv[0]); exit(1); } |