diff options
author | sueloverso <sue@mongodb.com> | 2017-07-26 02:24:30 -0400 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2017-07-26 16:24:30 +1000 |
commit | 540cb969ad733bd2743533c5cf07ee1adbb3a2e4 (patch) | |
tree | 2aad0099d54a78a340305bffc6a1edf877543318 | |
parent | 58a2c6ce5681d8357944466f2d11a9e0d14cdddb (diff) | |
download | mongo-540cb969ad733bd2743533c5cf07ee1adbb3a2e4.tar.gz |
WT-3440 Add a log record when starting a checkpoint. (#3525)
Otherwise recovery could do more work than necessary and create a new checkpoint.
-rw-r--r-- | dist/log.py | 66 | ||||
-rw-r--r-- | dist/log_data.py | 11 | ||||
-rwxr-xr-x | dist/s_longlines | 1 | ||||
-rw-r--r-- | dist/s_string.ok | 1 | ||||
-rwxr-xr-x | dist/s_whitespace | 1 | ||||
-rw-r--r-- | src/include/extern.h | 7 | ||||
-rw-r--r-- | src/include/wiredtiger.in | 4 | ||||
-rw-r--r-- | src/log/log.c | 10 | ||||
-rw-r--r-- | src/log/log_auto.c | 146 | ||||
-rw-r--r-- | src/log/log_sys.c | 36 | ||||
-rw-r--r-- | src/txn/txn_log.c | 52 | ||||
-rw-r--r-- | test/suite/test_compat01.py | 2 | ||||
-rw-r--r-- | test/suite/test_timestamp03.py | 230 |
13 files changed, 480 insertions, 87 deletions
diff --git a/dist/log.py b/dist/log.py index 81dc8bd35d7..2da8e5eae66 100644 --- a/dist/log.py +++ b/dist/log.py @@ -10,10 +10,10 @@ tmp_file = '__tmp' # Map log record types to: # (C type, pack type, printf format, printf arg(s), list of setup functions) field_types = { - 'WT_LSN' : ('WT_LSN *', 'II', '%" PRIu32 "%" PRIu32 "', - 'arg->l.file, arg->l.offset', [ '' ]), - 'string' : ('const char *', 'S', '%s', 'arg', [ '' ]), - 'item' : ('WT_ITEM *', 'u', '%s', 'escaped', + 'WT_LSN' : ('WT_LSN *', 'II', '[%" PRIu32 ", %" PRIu32 "]', + 'arg.l.file, arg.l.offset', [ '' ]), + 'string' : ('const char *', 'S', '\\"%s\\"', 'arg', [ '' ]), + 'item' : ('WT_ITEM *', 'u', '\\"%s\\"', 'escaped', [ 'WT_ERR(__logrec_make_json_str(session, &escaped, &arg));', 'WT_ERR(__logrec_make_hex_str(session, &escaped, &arg));']), 'recno' : ('uint64_t', 'r', '%" PRIu64 "', 'arg', [ '' ]), @@ -27,7 +27,7 @@ def cintype(f): def couttype(f): type = cintype(f) # We already have a pointer to a WT_ITEM - if f[0] == 'item': + if f[0] == 'item' or f[0] == 'WT_LSN': return type if type[-1] != '*': type += ' ' @@ -35,8 +35,8 @@ def couttype(f): def clocaltype(f): type = cintype(f) - # Allocate a WT_ITEM struct on the stack - if f[0] == 'item': + # Allocate WT_ITEM and WT_LSN structs on the stack + if f[0] in ('item', 'WT_LSN'): return type[:-2] return type @@ -62,10 +62,20 @@ def rec_pack_fmt(r): def printf_fmt(f): return field_types[f[0]][2] +def pack_arg(f): + if f[0] == 'WT_LSN': + return '%s->l.file, %s->l.offset' % (f[1], f[1]) + return f[1] + def printf_arg(f): arg = field_types[f[0]][3].replace('arg', f[1]) return ' ' + arg +def unpack_arg(f): + if f[0] == 'WT_LSN': + return '&%sp->l.file, &%sp->l.offset' % (f[1], f[1]) + return f[1] + 'p' + def printf_setup(f, i, nl_indent): stmt = field_types[f[0]][4][i].replace('arg', f[1]) return '' if stmt == '' else stmt + nl_indent @@ -94,7 +104,7 @@ def printf_line(f, optype, i, ishex): body = '%s%s(__wt_fprintf(session, WT_STDOUT(session),' % ( printf_setup(f, ishex, nl_indent), 'WT_ERR' if has_escape(optype.fields) else 'WT_RET') + \ - '%s "%s \\"%s\\": \\"%s\\"%s",%s));' % ( + '%s "%s \\"%s\\": %s%s",%s));' % ( nl_indent, precomma, name, printf_fmt(f), postcomma, printf_arg(f)) return ifbegin + body + ifend @@ -201,13 +211,10 @@ __logrec_make_hex_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item) # Emit code to read, write and print log operations (within a log record) for optype in log_data.optypes: - if not optype.fields: - continue - tfile.write(''' int __wt_logop_%(name)s_pack( - WT_SESSION_IMPL *session, WT_ITEM *logrec, + WT_SESSION_IMPL *session, WT_ITEM *logrec%(comma)s %(arg_decls)s) { \tconst char *fmt = WT_UNCHECKED_STRING(%(fmt)s); @@ -216,14 +223,14 @@ __wt_logop_%(name)s_pack( \toptype = %(macro)s; \tWT_RET(__wt_struct_size(session, &size, fmt, -\t optype, 0%(arg_names)s)); +\t optype, 0%(pack_args)s)); \t__wt_struct_size_adjust(session, &size); \tWT_RET(__wt_buf_extend(session, logrec, logrec->size + size)); \trecsize = (uint32_t)size; \tWT_RET(__wt_struct_pack(session, \t (uint8_t *)logrec->data + logrec->size, size, fmt, -\t optype, recsize%(arg_names)s)); +\t optype, recsize%(pack_args)s)); \tlogrec->size += (uint32_t)size; \treturn (0); @@ -231,17 +238,18 @@ __wt_logop_%(name)s_pack( ''' % { 'name' : optype.name, 'macro' : optype.macro_name(), + 'comma' : ',' if optype.fields else '', 'arg_decls' : ', '.join( '%s%s%s' % (cintype(f), '' if cintype(f)[-1] == '*' else ' ', f[1]) for f in optype.fields), - 'arg_names' : ''.join(', %s' % f[1] for f in optype.fields), - 'fmt' : op_pack_fmt(optype) + 'pack_args' : ''.join(', %s' % pack_arg(f) for f in optype.fields), + 'fmt' : op_pack_fmt(optype), }) tfile.write(''' int __wt_logop_%(name)s_unpack( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end%(comma)s %(arg_decls)s) { \tWT_DECL_RET; @@ -249,7 +257,7 @@ __wt_logop_%(name)s_unpack( \tuint32_t optype, size; \tif ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt, -\t &optype, &size%(arg_names)s)) != 0) +\t &optype, &size%(unpack_args)s)) != 0) \t\tWT_RET_MSG(session, ret, "logop_%(name)s: unpack failure"); \tWT_ASSERT(session, optype == %(macro)s); @@ -259,42 +267,43 @@ __wt_logop_%(name)s_unpack( ''' % { 'name' : optype.name, 'macro' : optype.macro_name(), + 'comma' : ',' if optype.fields else '', 'arg_decls' : ', '.join( '%s%sp' % (couttype(f), f[1]) for f in optype.fields), - 'arg_names' : ''.join(', %sp' % f[1] for f in optype.fields), + 'unpack_args' : ''.join(', %s' % unpack_arg(f) for f in optype.fields), 'fmt' : op_pack_fmt(optype) }) - last_field = optype.fields[-1] tfile.write(''' int __wt_logop_%(name)s_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) -{ -%(arg_ret)s\t%(arg_decls)s +{%(arg_ret)s%(arg_decls)s \t%(arg_unused)s%(arg_init)sWT_RET(__wt_logop_%(name)s_unpack( \t session, pp, end%(arg_addrs)s)); \tWT_RET(__wt_fprintf(session, WT_STDOUT(session), \t " \\"optype\\": \\"%(name)s\\",\\n")); -\t%(print_args)s +%(print_args)s %(arg_fini)s } ''' % { 'name' : optype.name, - 'arg_ret' : ('\tWT_DECL_RET;\n' if has_escape(optype.fields) else ''), - 'arg_decls' : ('\n\t'.join('%s%s%s;' % + 'arg_ret' : ('\n\tWT_DECL_RET;' if has_escape(optype.fields) else ''), + 'arg_decls' : (('\n\t' + '\n\t'.join('%s%s%s;' % (clocaltype(f), '' if clocaltype(f)[-1] == '*' else ' ', f[1]) - for f in optype.fields)) + escape_decl(optype.fields), + for f in optype.fields)) + escape_decl(optype.fields) + if optype.fields else ''), 'arg_unused' : ('' if has_escape(optype.fields) else 'WT_UNUSED(flags);\n\t'), 'arg_init' : ('escaped = NULL;\n\t' if has_escape(optype.fields) else ''), 'arg_fini' : ('\nerr:\t__wt_free(session, escaped);\n\treturn (ret);' if has_escape(optype.fields) else '\treturn (0);'), 'arg_addrs' : ''.join(', &%s' % f[1] for f in optype.fields), - 'print_args' : '\n\t'.join(printf_line(f, optype, i, s) + 'print_args' : ('\t' + '\n\t'.join(printf_line(f, optype, i, s) for i,f in enumerate(optype.fields) for s in range(0, n_setup(f))) + if optype.fields else ''), }) # Emit the printlog entry point @@ -312,9 +321,6 @@ __wt_txn_op_printlog(WT_SESSION_IMPL *session, \tswitch (optype) {''') for optype in log_data.optypes: - if not optype.fields: - continue - tfile.write(''' \tcase %(macro)s: \t\tWT_RET(%(print_func)s(session, pp, end, flags)); diff --git a/dist/log_data.py b/dist/log_data.py index 1782d2aadf0..487657f10ce 100644 --- a/dist/log_data.py +++ b/dist/log_data.py @@ -41,9 +41,8 @@ rectypes = [ # Debugging message in the log LogRecordType('message', 'message', [('string', 'message')]), - # System record - LogRecordType('system', 'system', [ - ('WT_LSN','prev_lsn'), ('item', 'unused')]), + # System (internal) log record + LogRecordType('system', 'system', []), ] class LogOperationType: @@ -61,6 +60,7 @@ class LogOperationType: # never change after they're written in a log file. # optypes = [ +# commit operations LogOperationType('col_put', 'column put', [('uint32', 'fileid'), ('recno', 'recno'), ('item', 'value')]), LogOperationType('col_remove', 'column remove', @@ -74,4 +74,9 @@ optypes = [ LogOperationType('row_truncate', 'row truncate', [('uint32', 'fileid'), ('item', 'start'), ('item', 'stop'), ('uint32', 'mode')]), + +# system operations + LogOperationType('checkpoint_start', 'checkpoint start', []), + LogOperationType('prev_lsn', 'previous LSN', + [('WT_LSN', 'prev_lsn')]), ] diff --git a/dist/s_longlines b/dist/s_longlines index bdb9811b4bd..736ea36e6d4 100755 --- a/dist/s_longlines +++ b/dist/s_longlines @@ -14,6 +14,7 @@ l=`(cd .. && -e '/include\/extern\.h/d' \ -e '/include\/extern_posix\.h/d' \ -e '/include\/extern_win\.h/d' \ + -e '/log\/log_auto\.c/d' \ -e '/support\/stat\.c/d'` for f in $l ; do diff --git a/dist/s_string.ok b/dist/s_string.ok index 4a4b60a9d79..d9e617ec42e 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -994,6 +994,7 @@ oo opcode opendir openfile +oplist oplog optimizations optype diff --git a/dist/s_whitespace b/dist/s_whitespace index 874074dfb50..f4c96cdc91a 100755 --- a/dist/s_whitespace +++ b/dist/s_whitespace @@ -28,6 +28,7 @@ find bench dist examples ext src test \ -e '/checksum\/power8/d' \ -e '/3rdparty/d' \ -e '/docs\/tools/d' \ + -e '/log\/log_auto/d' \ | while read f ; do whitespace $f done diff --git a/src/include/extern.h b/src/include/extern.h index 48886ee9dde..4d945d74cb2 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -362,6 +362,7 @@ extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_AT extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session); +extern int __wt_log_printf(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn); extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn); @@ -406,6 +407,12 @@ extern int __wt_logop_row_remove_print(WT_SESSION_IMPL *session, const uint8_t * extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_checkpoint_start_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_checkpoint_start_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_checkpoint_start_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_prev_lsn_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *prev_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_prev_lsn_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *prev_lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_prev_lsn_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index d7a28bc7380..93d9a3dc7d1 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -4503,6 +4503,10 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_LOGOP_ROW_REMOVE 5 /*! row truncate */ #define WT_LOGOP_ROW_TRUNCATE 6 +/*! checkpoint start */ +#define WT_LOGOP_CHECKPOINT_START 7 +/*! previous LSN */ +#define WT_LOGOP_PREV_LSN 8 /*! @} */ /******************************************* diff --git a/src/log/log.c b/src/log/log.c index 2a912e6568f..1460687d5af 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -20,11 +20,11 @@ static int __log_write_internal( #define WT_LOG_OPEN_CREATE_OK 0x01 /* - * __log_printf_internal -- - * Internal call to write a log message. + * __wt_log_printf -- + * Write a text message to the log. */ -static int -__log_printf_internal(WT_SESSION_IMPL *session, const char *fmt, ...) +int +__wt_log_printf(WT_SESSION_IMPL *session, const char *fmt, ...) { WT_DECL_RET; va_list ap; @@ -1259,7 +1259,7 @@ __wt_log_set_version(WT_SESSION_IMPL *session, uint16_t version, * an archive correctly removes all earlier logs. * Write an internal printf record. */ - WT_ERR(__log_printf_internal(session, + WT_ERR(__wt_log_printf(session, "COMPATIBILITY: Version now %" PRIu16, log->log_version)); if (lognump != NULL) *lognump = log->alloc_lsn.l.file; diff --git a/src/log/log_auto.c b/src/log/log_auto.c index c2b38184405..76df4ebd98f 100644 --- a/src/log/log_auto.c +++ b/src/log/log_auto.c @@ -149,9 +149,9 @@ __wt_logop_col_put_print(WT_SESSION_IMPL *session, WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"optype\": \"col_put\",\n")); WT_ERR(__wt_fprintf(session, WT_STDOUT(session), - " \"fileid\": \"%" PRIu32 "\",\n", fileid)); + " \"fileid\": %" PRIu32 ",\n", fileid)); WT_ERR(__wt_fprintf(session, WT_STDOUT(session), - " \"recno\": \"%" PRIu64 "\",\n", recno)); + " \"recno\": %" PRIu64 ",\n", recno)); WT_ERR(__logrec_make_json_str(session, &escaped, &value)); WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"value\": \"%s\"", escaped)); @@ -221,9 +221,9 @@ __wt_logop_col_remove_print(WT_SESSION_IMPL *session, WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"optype\": \"col_remove\",\n")); WT_RET(__wt_fprintf(session, WT_STDOUT(session), - " \"fileid\": \"%" PRIu32 "\",\n", fileid)); + " \"fileid\": %" PRIu32 ",\n", fileid)); WT_RET(__wt_fprintf(session, WT_STDOUT(session), - " \"recno\": \"%" PRIu64 "\"", recno)); + " \"recno\": %" PRIu64 "", recno)); return (0); } @@ -284,11 +284,11 @@ __wt_logop_col_truncate_print(WT_SESSION_IMPL *session, WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"optype\": \"col_truncate\",\n")); WT_RET(__wt_fprintf(session, WT_STDOUT(session), - " \"fileid\": \"%" PRIu32 "\",\n", fileid)); + " \"fileid\": %" PRIu32 ",\n", fileid)); WT_RET(__wt_fprintf(session, WT_STDOUT(session), - " \"start\": \"%" PRIu64 "\",\n", start)); + " \"start\": %" PRIu64 ",\n", start)); WT_RET(__wt_fprintf(session, WT_STDOUT(session), - " \"stop\": \"%" PRIu64 "\"", stop)); + " \"stop\": %" PRIu64 "", stop)); return (0); } @@ -351,7 +351,7 @@ __wt_logop_row_put_print(WT_SESSION_IMPL *session, WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"optype\": \"row_put\",\n")); WT_ERR(__wt_fprintf(session, WT_STDOUT(session), - " \"fileid\": \"%" PRIu32 "\",\n", fileid)); + " \"fileid\": %" PRIu32 ",\n", fileid)); WT_ERR(__logrec_make_json_str(session, &escaped, &key)); WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"key\": \"%s\",\n", escaped)); @@ -431,7 +431,7 @@ __wt_logop_row_remove_print(WT_SESSION_IMPL *session, WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"optype\": \"row_remove\",\n")); WT_ERR(__wt_fprintf(session, WT_STDOUT(session), - " \"fileid\": \"%" PRIu32 "\",\n", fileid)); + " \"fileid\": %" PRIu32 ",\n", fileid)); WT_ERR(__logrec_make_json_str(session, &escaped, &key)); WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"key\": \"%s\"", escaped)); @@ -505,7 +505,7 @@ __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"optype\": \"row_truncate\",\n")); WT_ERR(__wt_fprintf(session, WT_STDOUT(session), - " \"fileid\": \"%" PRIu32 "\",\n", fileid)); + " \"fileid\": %" PRIu32 ",\n", fileid)); WT_ERR(__logrec_make_json_str(session, &escaped, &start)); WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"start\": \"%s\",\n", escaped)); @@ -523,13 +523,129 @@ __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, " \"stop-hex\": \"%s\",\n", escaped)); } WT_ERR(__wt_fprintf(session, WT_STDOUT(session), - " \"mode\": \"%" PRIu32 "\"", mode)); + " \"mode\": %" PRIu32 "", mode)); err: __wt_free(session, escaped); return (ret); } int +__wt_logop_checkpoint_start_pack( + WT_SESSION_IMPL *session, WT_ITEM *logrec + ) +{ + const char *fmt = WT_UNCHECKED_STRING(II); + size_t size; + uint32_t optype, recsize; + + optype = WT_LOGOP_CHECKPOINT_START; + WT_RET(__wt_struct_size(session, &size, fmt, + optype, 0)); + + __wt_struct_size_adjust(session, &size); + WT_RET(__wt_buf_extend(session, logrec, logrec->size + size)); + recsize = (uint32_t)size; + WT_RET(__wt_struct_pack(session, + (uint8_t *)logrec->data + logrec->size, size, fmt, + optype, recsize)); + + logrec->size += (uint32_t)size; + return (0); +} + +int +__wt_logop_checkpoint_start_unpack( + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end + ) +{ + WT_DECL_RET; + const char *fmt = WT_UNCHECKED_STRING(II); + uint32_t optype, size; + + if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt, + &optype, &size)) != 0) + WT_RET_MSG(session, ret, "logop_checkpoint_start: unpack failure"); + WT_ASSERT(session, optype == WT_LOGOP_CHECKPOINT_START); + + *pp += size; + return (0); +} + +int +__wt_logop_checkpoint_start_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) +{ + + WT_UNUSED(flags); + WT_RET(__wt_logop_checkpoint_start_unpack( + session, pp, end)); + + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"optype\": \"checkpoint_start\",\n")); + + return (0); +} + +int +__wt_logop_prev_lsn_pack( + WT_SESSION_IMPL *session, WT_ITEM *logrec, + WT_LSN *prev_lsn) +{ + const char *fmt = WT_UNCHECKED_STRING(IIII); + size_t size; + uint32_t optype, recsize; + + optype = WT_LOGOP_PREV_LSN; + WT_RET(__wt_struct_size(session, &size, fmt, + optype, 0, prev_lsn->l.file, prev_lsn->l.offset)); + + __wt_struct_size_adjust(session, &size); + WT_RET(__wt_buf_extend(session, logrec, logrec->size + size)); + recsize = (uint32_t)size; + WT_RET(__wt_struct_pack(session, + (uint8_t *)logrec->data + logrec->size, size, fmt, + optype, recsize, prev_lsn->l.file, prev_lsn->l.offset)); + + logrec->size += (uint32_t)size; + return (0); +} + +int +__wt_logop_prev_lsn_unpack( + WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + WT_LSN *prev_lsnp) +{ + WT_DECL_RET; + const char *fmt = WT_UNCHECKED_STRING(IIII); + uint32_t optype, size; + + if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt, + &optype, &size, &prev_lsnp->l.file, &prev_lsnp->l.offset)) != 0) + WT_RET_MSG(session, ret, "logop_prev_lsn: unpack failure"); + WT_ASSERT(session, optype == WT_LOGOP_PREV_LSN); + + *pp += size; + return (0); +} + +int +__wt_logop_prev_lsn_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) +{ + WT_LSN prev_lsn; + + WT_UNUSED(flags); + WT_RET(__wt_logop_prev_lsn_unpack( + session, pp, end, &prev_lsn)); + + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"optype\": \"prev_lsn\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"prev_lsn\": [%" PRIu32 ", %" PRIu32 "]", prev_lsn.l.file, prev_lsn.l.offset)); + return (0); +} + +int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) { @@ -564,6 +680,14 @@ __wt_txn_op_printlog(WT_SESSION_IMPL *session, WT_RET(__wt_logop_row_truncate_print(session, pp, end, flags)); break; + case WT_LOGOP_CHECKPOINT_START: + WT_RET(__wt_logop_checkpoint_start_print(session, pp, end, flags)); + break; + + case WT_LOGOP_PREV_LSN: + WT_RET(__wt_logop_prev_lsn_print(session, pp, end, flags)); + break; + WT_ILLEGAL_VALUE(session); } diff --git a/src/log/log_sys.c b/src/log/log_sys.c index 0f3cfdbc14b..4eb2a8e23d2 100644 --- a/src/log/log_sys.c +++ b/src/log/log_sys.c @@ -18,31 +18,28 @@ __wt_log_system_record( { WT_DECL_ITEM(logrec_buf); WT_DECL_RET; - WT_ITEM *dummy, empty; WT_LOG *log; WT_LOG_RECORD *logrec; WT_LOGSLOT tmp; WT_MYSLOT myslot; - const char *fmt = WT_UNCHECKED_STRING(IIIU); + const char *fmt = WT_UNCHECKED_STRING(I); uint32_t rectype = WT_LOGREC_SYSTEM; size_t recsize; log = S2C(session)->log; WT_RET(__wt_logrec_alloc(session, log->allocsize, &logrec_buf)); memset((uint8_t *)logrec_buf->mem, 0, log->allocsize); - WT_CLEAR(empty); - dummy = ∅ - /* - * There is currently an unused portion of the system record for - * future use. Send in a NULL entry. - */ - WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype, - lsn->l.file, lsn->l.offset, dummy)); - WT_ASSERT(session, recsize <= log->allocsize); + + WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype)); WT_ERR(__wt_struct_pack(session, (uint8_t *)logrec_buf->data + logrec_buf->size, recsize, fmt, - rectype, lsn->l.file, lsn->l.offset, dummy)); + rectype)); + logrec_buf->size += recsize; + WT_ERR(__wt_logop_prev_lsn_pack(session, logrec_buf, lsn)); + WT_ASSERT(session, logrec_buf->size <= log->allocsize); + logrec = (WT_LOG_RECORD *)logrec_buf->mem; + /* * We know system records are this size. And we have to adjust * the size now because we're not going through the normal log @@ -50,9 +47,8 @@ __wt_log_system_record( * earlier. */ logrec_buf->size = logrec->len = log->allocsize; - /* - * We do not compress nor encrypt this record. - */ + + /* We do not compress nor encrypt this record. */ logrec->checksum = 0; logrec->flags = 0; __wt_log_record_byteswap(logrec); @@ -82,16 +78,10 @@ __wt_log_recover_system(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *lsnp) { WT_DECL_RET; - WT_ITEM unused; - uint32_t prev_file, prev_offset; - const char *fmt = WT_UNCHECKED_STRING(IIU); - if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt, - &prev_file, &prev_offset, &unused)) != 0) + if ((ret = __wt_logop_prev_lsn_unpack(session, pp, end, lsnp)) != 0) WT_RET_MSG(session, ret, "log_recover_prevlsn: unpack failure"); - if (lsnp != NULL) - WT_SET_LSN(lsnp, prev_file, prev_offset); - *pp = end; + return (0); } diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index abe4c085e50..d2fc42804a4 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -109,11 +109,11 @@ __txn_op_log(WT_SESSION_IMPL *session, } /* - * __txn_commit_printlog -- - * Print a commit log record. + * __txn_oplist_printlog -- + * Print a list of operations from a log record. */ static int -__txn_commit_printlog(WT_SESSION_IMPL *session, +__txn_oplist_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) { bool firstrecord; @@ -344,8 +344,8 @@ __wt_txn_checkpoint_log( WT_TXN *txn; uint8_t *end, *p; size_t recsize; - uint32_t i, rectype = WT_LOGREC_CHECKPOINT; - const char *fmt = WT_UNCHECKED_STRING(IIIIu); + uint32_t i, rectype; + const char *fmt; conn = S2C(session); txn = &session->txn; @@ -367,7 +367,31 @@ __wt_txn_checkpoint_log( switch (flags) { case WT_TXN_LOG_CKPT_PREPARE: txn->full_ckpt = true; - WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true)); + + if (conn->compat_major >= WT_LOG_V2) { + /* + * Write the system log record containing a checkpoint + * start operation. + */ + rectype = WT_LOGREC_SYSTEM; + fmt = WT_UNCHECKED_STRING(I); + WT_ERR(__wt_struct_size( + session, &recsize, fmt, rectype)); + WT_ERR(__wt_logrec_alloc(session, recsize, &logrec)); + + WT_ERR(__wt_struct_pack(session, + (uint8_t *)logrec->data + logrec->size, recsize, + fmt, rectype)); + logrec->size += (uint32_t)recsize; + WT_ERR(__wt_logop_checkpoint_start_pack( + session, logrec)); + WT_ERR(__wt_log_write(session, logrec, ckpt_lsn, 0)); + } else { + WT_ERR(__wt_log_printf(session, + "CHECKPOINT: Starting record")); + WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true)); + } + /* * We need to make sure that the log records in the checkpoint * LSN are on disk. In particular to make sure that the @@ -401,14 +425,16 @@ __wt_txn_checkpoint_log( ckpt_snapshot = txn->ckpt_snapshot; /* Write the checkpoint log record. */ - WT_ERR(__wt_struct_size(session, &recsize, fmt, - rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset, + rectype = WT_LOGREC_CHECKPOINT; + fmt = WT_UNCHECKED_STRING(IIIIu); + WT_ERR(__wt_struct_size(session, &recsize, + fmt, rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset, txn->ckpt_nsnapshot, ckpt_snapshot)); WT_ERR(__wt_logrec_alloc(session, recsize, &logrec)); WT_ERR(__wt_struct_pack(session, - (uint8_t *)logrec->data + logrec->size, recsize, fmt, - rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset, + (uint8_t *)logrec->data + logrec->size, recsize, + fmt, rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset, txn->ckpt_nsnapshot, ckpt_snapshot)); logrec->size += (uint32_t)recsize; WT_ERR(__wt_log_write(session, logrec, lsnp, @@ -568,7 +594,7 @@ __txn_printlog(WT_SESSION_IMPL *session, " \"type\" : \"commit\",\n")); WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"txnid\" : %" PRIu64 ",\n", txnid)); - WT_RET(__txn_commit_printlog(session, &p, end, args->flags)); + WT_RET(__txn_oplist_printlog(session, &p, end, args->flags)); break; case WT_LOGREC_FILE_SYNC: @@ -596,9 +622,7 @@ __txn_printlog(WT_SESSION_IMPL *session, WT_UNCHECKED_STRING(II), &lsnfile, &lsnoffset)); WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"type\" : \"system\",\n")); - WT_RET(__wt_fprintf(session, WT_STDOUT(session), - " \"prev_lsn\" : [%" PRIu32 ",%" PRIu32 "]\n", - lsnfile, lsnoffset)); + WT_RET(__txn_oplist_printlog(session, &p, end, args->flags)); break; } diff --git a/test/suite/test_compat01.py b/test/suite/test_compat01.py index 460d38d5e08..1c9f07c02f2 100644 --- a/test/suite/test_compat01.py +++ b/test/suite/test_compat01.py @@ -102,7 +102,7 @@ class test_compat01(wttest.WiredTigerTestCase, suite_subprocess): contains = 0 with open('printlog.out') as logfile: for line in logfile: - if 'prev_lsn' in line: + if 'optype' in line and 'prev_lsn' in line: contains += 1 self.assertEqual(prev_lsn_count, contains) diff --git a/test/suite/test_timestamp03.py b/test/suite/test_timestamp03.py new file mode 100644 index 00000000000..5b42896fdcd --- /dev/null +++ b/test/suite/test_timestamp03.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_timestamp03.py +# Timestamps: checkpoints +# + +from helper import copy_wiredtiger_home +import random +from suite_subprocess import suite_subprocess +import wiredtiger, wttest +from wtscenario import make_scenarios + +def timestamp_str(t): + return '%x' % t + +def timestamp_ret_str(t): + s = timestamp_str(t) + if len(s) % 2 == 1: + s = '0' + s + return s + +class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess): + tablename = 'ts03_ts_nologged' + tablename2 = 'ts03_nots_logged' + tablename3 = 'ts03_ts_logged' + + types = [ + ('file', dict(uri='file:', use_cg=False, use_index=False)), + ('lsm', dict(uri='lsm:', use_cg=False, use_index=False)), + ('table-cg', dict(uri='table:', use_cg=True, use_index=False)), + ('table-index', dict(uri='table:', use_cg=False, use_index=True)), + ('table-simple', dict(uri='table:', use_cg=False, use_index=False)), + ] + + conncfg = [ + ('nolog', dict(conncfg='create')), + ('V1', dict(conncfg='create,log=(enabled),compatibility=(release="2.9")')), + ('V2', dict(conncfg='create,log=(enabled)')), + ] + + ckpt = [ + ('read_ts', dict(ckptcfg='read_timestamp', val='none')), + ] + + scenarios = make_scenarios(types, ckpt, conncfg) + + # Binary values. + value = u'\u0001\u0002abcd\u0003\u0004' + value2 = u'\u0001\u0002dcba\u0003\u0004' + + def conn_config(self): + return self.conncfg + + # Check that a cursor (optionally started in a new transaction), sees the + # expected values. + def check(self, session, txn_config, expected): + if txn_config: + session.begin_transaction(txn_config) + c = session.open_cursor(self.uri + self.tablename, None) + actual = dict((k, v) for k, v in c if v != 0) + self.assertEqual(actual, expected) + # Search for the expected items as well as iterating + for k, v in expected.iteritems(): + self.assertEqual(c[k], v, "for key " + str(k)) + c.close() + if txn_config: + session.commit_transaction() + + # Check that a cursor sees the expected values after a checkpoint. + def ckpt_backup(self, valcnt, valcnt2, valcnt3): + newdir = "BACKUP" + + # Take a checkpoint. Make a copy of the database. Open the + # copy and verify whether or not the expected data is in there. + self.pr("CKPT: " + self.ckptcfg) + ckptcfg = self.ckptcfg + if ckptcfg == 'read_timestamp': + ckptcfg = self.ckptcfg + '=' + self.oldts + # print "CKPT: " + ckptcfg + self.session.checkpoint(ckptcfg) + copy_wiredtiger_home('.', newdir, True) + + conn = self.setUpConnectionOpen(newdir) + session = self.setUpSessionOpen(conn) + c = session.open_cursor(self.uri + self.tablename, None) + c2 = session.open_cursor(self.uri + self.tablename2, None) + c3 = session.open_cursor(self.uri + self.tablename3, None) + # Count how many times the second value is present + count = 0 + for k, v in c: + if self.value2 in str(v): + # print "value2 found in key " + str(k) + count += 1 + # else: + # print "Non-value2: key " + str(k) + c.close() + # Count how many times the second value is present in the + # non-timestamp table. + count2 = 0 + for k, v in c2: + if self.value2 in str(v): + # print "value2 found in key " + str(k) + count2 += 1 + # else: + # print "Non-value2: key " + str(k) + c2.close() + # Count how many times the second value is present in the + # logged timestamp table. + count3 = 0 + for k, v in c3: + if self.value2 in str(v): + # print "value2 found in key " + str(k) + count3 += 1 + # else: + # print "Non-value2: key " + str(k) + c3.close() + conn.close() + # print "CHECK BACKUP: Count " + str(count) + " Count2 " + str(count2) + " Count3 " + str(count3) + # print "CHECK BACKUP: Expect value2 count " + str(valcnt) + # print "CHECK BACKUP: 2nd table Expect value2 count " + str(valcnt2) + # print "CHECK BACKUP: 3rd table Expect value2 count " + str(valcnt3) + # print "CHECK BACKUP: config " + str(self.ckptcfg) + self.assertEqual(count, valcnt) + self.assertEqual(count2, valcnt2) + self.assertEqual(count3, valcnt3) + + def test_timestamp03(self): + if not wiredtiger.timestamp_build(): + self.skipTest('requires a timestamp build') + + uri = self.uri + self.tablename + uri2 = self.uri + self.tablename2 + uri3 = self.uri + self.tablename3 + # + # Open three tables: + # 1. Table is not logged and uses timestamps. + # 2. Table is logged and does not use timestamps. + # 3. Table is logged and uses timestamps. + # + self.session.create(uri, 'key_format=i,value_format=S,log=(enabled=false)') + c = self.session.open_cursor(uri) + self.session.create(uri2, 'key_format=i,value_format=S') + c2 = self.session.open_cursor(uri2) + self.session.create(uri3, 'key_format=i,value_format=S') + c3 = self.session.open_cursor(uri3) + + # Insert keys 1..100 each with timestamp=key, in some order + nkeys = 100 + orig_keys = range(1, nkeys+1) + keys = orig_keys[:] + random.shuffle(keys) + + for k in keys: + c2[k] = self.value + self.session.begin_transaction() + c[k] = self.value + c3[k] = self.value + self.session.commit_transaction('commit_timestamp=' + timestamp_str(k)) + + # Now check that we see the expected state when reading at each + # timestamp + for i, t in enumerate(orig_keys): + self.check(self.session, 'read_timestamp=' + timestamp_str(t), + dict((k, self.value) for k in orig_keys[:i+1])) + + # Bump the oldest timestamp, we're not going back... + self.assertEqual(self.conn.query_timestamp(), timestamp_ret_str(100)) + self.oldts = timestamp_str(100) + self.conn.set_timestamp('oldest_timestamp=' + self.oldts) + # print "Oldest " + self.oldts + + # Update them and retry. + random.shuffle(keys) + count = 0 + for k in keys: + # Make sure a timestamp cursor is the last one to update. This + # tests the scenario for a bug we found where recovery replayed + # the last record written into the log. + # + # print "Key " + str(k) + " to value2" + c2[k] = self.value2 + self.session.begin_transaction() + c[k] = self.value2 + c3[k] = self.value2 + ts = timestamp_str(k + 101) + self.session.commit_transaction('commit_timestamp=' + ts) + # print "Commit key " + str(k) + " ts " + ts + count += 1 + # print "Updated " + str(count) + " keys to value2" + + # Take a checkpoint using the given configuration. Then verify + # whether value2 appears in a copy of that data or not. + if self.val == 'all': + valcnt = nkeys + else: + valcnt = 0 + # Table 2 should always see all the keys + # Table 3 should see whatever table 1 sees. + valcnt2 = nkeys + valcnt3 = valcnt + self.ckpt_backup(valcnt, valcnt2, valcnt3) + +if __name__ == '__main__': + wttest.run() |