summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsueloverso <sue@mongodb.com>2017-07-26 02:24:30 -0400
committerAlex Gorrod <alexander.gorrod@mongodb.com>2017-07-26 16:24:30 +1000
commit540cb969ad733bd2743533c5cf07ee1adbb3a2e4 (patch)
tree2aad0099d54a78a340305bffc6a1edf877543318
parent58a2c6ce5681d8357944466f2d11a9e0d14cdddb (diff)
downloadmongo-540cb969ad733bd2743533c5cf07ee1adbb3a2e4.tar.gz
WT-3440 Add a log record when starting a checkpoint. (#3525)
Otherwise recovery could do more work than necessary and create a new checkpoint.
-rw-r--r--dist/log.py66
-rw-r--r--dist/log_data.py11
-rwxr-xr-xdist/s_longlines1
-rw-r--r--dist/s_string.ok1
-rwxr-xr-xdist/s_whitespace1
-rw-r--r--src/include/extern.h7
-rw-r--r--src/include/wiredtiger.in4
-rw-r--r--src/log/log.c10
-rw-r--r--src/log/log_auto.c146
-rw-r--r--src/log/log_sys.c36
-rw-r--r--src/txn/txn_log.c52
-rw-r--r--test/suite/test_compat01.py2
-rw-r--r--test/suite/test_timestamp03.py230
13 files changed, 480 insertions, 87 deletions
diff --git a/dist/log.py b/dist/log.py
index 81dc8bd35d7..2da8e5eae66 100644
--- a/dist/log.py
+++ b/dist/log.py
@@ -10,10 +10,10 @@ tmp_file = '__tmp'
# Map log record types to:
# (C type, pack type, printf format, printf arg(s), list of setup functions)
field_types = {
- 'WT_LSN' : ('WT_LSN *', 'II', '%" PRIu32 "%" PRIu32 "',
- 'arg->l.file, arg->l.offset', [ '' ]),
- 'string' : ('const char *', 'S', '%s', 'arg', [ '' ]),
- 'item' : ('WT_ITEM *', 'u', '%s', 'escaped',
+ 'WT_LSN' : ('WT_LSN *', 'II', '[%" PRIu32 ", %" PRIu32 "]',
+ 'arg.l.file, arg.l.offset', [ '' ]),
+ 'string' : ('const char *', 'S', '\\"%s\\"', 'arg', [ '' ]),
+ 'item' : ('WT_ITEM *', 'u', '\\"%s\\"', 'escaped',
[ 'WT_ERR(__logrec_make_json_str(session, &escaped, &arg));',
'WT_ERR(__logrec_make_hex_str(session, &escaped, &arg));']),
'recno' : ('uint64_t', 'r', '%" PRIu64 "', 'arg', [ '' ]),
@@ -27,7 +27,7 @@ def cintype(f):
def couttype(f):
type = cintype(f)
# We already have a pointer to a WT_ITEM
- if f[0] == 'item':
+ if f[0] == 'item' or f[0] == 'WT_LSN':
return type
if type[-1] != '*':
type += ' '
@@ -35,8 +35,8 @@ def couttype(f):
def clocaltype(f):
type = cintype(f)
- # Allocate a WT_ITEM struct on the stack
- if f[0] == 'item':
+ # Allocate WT_ITEM and WT_LSN structs on the stack
+ if f[0] in ('item', 'WT_LSN'):
return type[:-2]
return type
@@ -62,10 +62,20 @@ def rec_pack_fmt(r):
def printf_fmt(f):
return field_types[f[0]][2]
+def pack_arg(f):
+ if f[0] == 'WT_LSN':
+ return '%s->l.file, %s->l.offset' % (f[1], f[1])
+ return f[1]
+
def printf_arg(f):
arg = field_types[f[0]][3].replace('arg', f[1])
return ' ' + arg
+def unpack_arg(f):
+ if f[0] == 'WT_LSN':
+ return '&%sp->l.file, &%sp->l.offset' % (f[1], f[1])
+ return f[1] + 'p'
+
def printf_setup(f, i, nl_indent):
stmt = field_types[f[0]][4][i].replace('arg', f[1])
return '' if stmt == '' else stmt + nl_indent
@@ -94,7 +104,7 @@ def printf_line(f, optype, i, ishex):
body = '%s%s(__wt_fprintf(session, WT_STDOUT(session),' % (
printf_setup(f, ishex, nl_indent),
'WT_ERR' if has_escape(optype.fields) else 'WT_RET') + \
- '%s "%s \\"%s\\": \\"%s\\"%s",%s));' % (
+ '%s "%s \\"%s\\": %s%s",%s));' % (
nl_indent, precomma, name, printf_fmt(f), postcomma,
printf_arg(f))
return ifbegin + body + ifend
@@ -201,13 +211,10 @@ __logrec_make_hex_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item)
# Emit code to read, write and print log operations (within a log record)
for optype in log_data.optypes:
- if not optype.fields:
- continue
-
tfile.write('''
int
__wt_logop_%(name)s_pack(
- WT_SESSION_IMPL *session, WT_ITEM *logrec,
+ WT_SESSION_IMPL *session, WT_ITEM *logrec%(comma)s
%(arg_decls)s)
{
\tconst char *fmt = WT_UNCHECKED_STRING(%(fmt)s);
@@ -216,14 +223,14 @@ __wt_logop_%(name)s_pack(
\toptype = %(macro)s;
\tWT_RET(__wt_struct_size(session, &size, fmt,
-\t optype, 0%(arg_names)s));
+\t optype, 0%(pack_args)s));
\t__wt_struct_size_adjust(session, &size);
\tWT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
\trecsize = (uint32_t)size;
\tWT_RET(__wt_struct_pack(session,
\t (uint8_t *)logrec->data + logrec->size, size, fmt,
-\t optype, recsize%(arg_names)s));
+\t optype, recsize%(pack_args)s));
\tlogrec->size += (uint32_t)size;
\treturn (0);
@@ -231,17 +238,18 @@ __wt_logop_%(name)s_pack(
''' % {
'name' : optype.name,
'macro' : optype.macro_name(),
+ 'comma' : ',' if optype.fields else '',
'arg_decls' : ', '.join(
'%s%s%s' % (cintype(f), '' if cintype(f)[-1] == '*' else ' ', f[1])
for f in optype.fields),
- 'arg_names' : ''.join(', %s' % f[1] for f in optype.fields),
- 'fmt' : op_pack_fmt(optype)
+ 'pack_args' : ''.join(', %s' % pack_arg(f) for f in optype.fields),
+ 'fmt' : op_pack_fmt(optype),
})
tfile.write('''
int
__wt_logop_%(name)s_unpack(
- WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end%(comma)s
%(arg_decls)s)
{
\tWT_DECL_RET;
@@ -249,7 +257,7 @@ __wt_logop_%(name)s_unpack(
\tuint32_t optype, size;
\tif ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
-\t &optype, &size%(arg_names)s)) != 0)
+\t &optype, &size%(unpack_args)s)) != 0)
\t\tWT_RET_MSG(session, ret, "logop_%(name)s: unpack failure");
\tWT_ASSERT(session, optype == %(macro)s);
@@ -259,42 +267,43 @@ __wt_logop_%(name)s_unpack(
''' % {
'name' : optype.name,
'macro' : optype.macro_name(),
+ 'comma' : ',' if optype.fields else '',
'arg_decls' : ', '.join(
'%s%sp' % (couttype(f), f[1]) for f in optype.fields),
- 'arg_names' : ''.join(', %sp' % f[1] for f in optype.fields),
+ 'unpack_args' : ''.join(', %s' % unpack_arg(f) for f in optype.fields),
'fmt' : op_pack_fmt(optype)
})
- last_field = optype.fields[-1]
tfile.write('''
int
__wt_logop_%(name)s_print(WT_SESSION_IMPL *session,
const uint8_t **pp, const uint8_t *end, uint32_t flags)
-{
-%(arg_ret)s\t%(arg_decls)s
+{%(arg_ret)s%(arg_decls)s
\t%(arg_unused)s%(arg_init)sWT_RET(__wt_logop_%(name)s_unpack(
\t session, pp, end%(arg_addrs)s));
\tWT_RET(__wt_fprintf(session, WT_STDOUT(session),
\t " \\"optype\\": \\"%(name)s\\",\\n"));
-\t%(print_args)s
+%(print_args)s
%(arg_fini)s
}
''' % {
'name' : optype.name,
- 'arg_ret' : ('\tWT_DECL_RET;\n' if has_escape(optype.fields) else ''),
- 'arg_decls' : ('\n\t'.join('%s%s%s;' %
+ 'arg_ret' : ('\n\tWT_DECL_RET;' if has_escape(optype.fields) else ''),
+ 'arg_decls' : (('\n\t' + '\n\t'.join('%s%s%s;' %
(clocaltype(f), '' if clocaltype(f)[-1] == '*' else ' ', f[1])
- for f in optype.fields)) + escape_decl(optype.fields),
+ for f in optype.fields)) + escape_decl(optype.fields)
+ if optype.fields else ''),
'arg_unused' : ('' if has_escape(optype.fields)
else 'WT_UNUSED(flags);\n\t'),
'arg_init' : ('escaped = NULL;\n\t' if has_escape(optype.fields) else ''),
'arg_fini' : ('\nerr:\t__wt_free(session, escaped);\n\treturn (ret);'
if has_escape(optype.fields) else '\treturn (0);'),
'arg_addrs' : ''.join(', &%s' % f[1] for f in optype.fields),
- 'print_args' : '\n\t'.join(printf_line(f, optype, i, s)
+ 'print_args' : ('\t' + '\n\t'.join(printf_line(f, optype, i, s)
for i,f in enumerate(optype.fields) for s in range(0, n_setup(f)))
+ if optype.fields else ''),
})
# Emit the printlog entry point
@@ -312,9 +321,6 @@ __wt_txn_op_printlog(WT_SESSION_IMPL *session,
\tswitch (optype) {''')
for optype in log_data.optypes:
- if not optype.fields:
- continue
-
tfile.write('''
\tcase %(macro)s:
\t\tWT_RET(%(print_func)s(session, pp, end, flags));
diff --git a/dist/log_data.py b/dist/log_data.py
index 1782d2aadf0..487657f10ce 100644
--- a/dist/log_data.py
+++ b/dist/log_data.py
@@ -41,9 +41,8 @@ rectypes = [
# Debugging message in the log
LogRecordType('message', 'message', [('string', 'message')]),
- # System record
- LogRecordType('system', 'system', [
- ('WT_LSN','prev_lsn'), ('item', 'unused')]),
+ # System (internal) log record
+ LogRecordType('system', 'system', []),
]
class LogOperationType:
@@ -61,6 +60,7 @@ class LogOperationType:
# never change after they're written in a log file.
#
optypes = [
+# commit operations
LogOperationType('col_put', 'column put',
[('uint32', 'fileid'), ('recno', 'recno'), ('item', 'value')]),
LogOperationType('col_remove', 'column remove',
@@ -74,4 +74,9 @@ optypes = [
LogOperationType('row_truncate', 'row truncate',
[('uint32', 'fileid'), ('item', 'start'), ('item', 'stop'),
('uint32', 'mode')]),
+
+# system operations
+ LogOperationType('checkpoint_start', 'checkpoint start', []),
+ LogOperationType('prev_lsn', 'previous LSN',
+ [('WT_LSN', 'prev_lsn')]),
]
diff --git a/dist/s_longlines b/dist/s_longlines
index bdb9811b4bd..736ea36e6d4 100755
--- a/dist/s_longlines
+++ b/dist/s_longlines
@@ -14,6 +14,7 @@ l=`(cd .. &&
-e '/include\/extern\.h/d' \
-e '/include\/extern_posix\.h/d' \
-e '/include\/extern_win\.h/d' \
+ -e '/log\/log_auto\.c/d' \
-e '/support\/stat\.c/d'`
for f in $l ; do
diff --git a/dist/s_string.ok b/dist/s_string.ok
index 4a4b60a9d79..d9e617ec42e 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -994,6 +994,7 @@ oo
opcode
opendir
openfile
+oplist
oplog
optimizations
optype
diff --git a/dist/s_whitespace b/dist/s_whitespace
index 874074dfb50..f4c96cdc91a 100755
--- a/dist/s_whitespace
+++ b/dist/s_whitespace
@@ -28,6 +28,7 @@ find bench dist examples ext src test \
-e '/checksum\/power8/d' \
-e '/3rdparty/d' \
-e '/docs\/tools/d' \
+ -e '/log\/log_auto/d' \
| while read f ; do
whitespace $f
done
diff --git a/src/include/extern.h b/src/include/extern.h
index 48886ee9dde..4d945d74cb2 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -362,6 +362,7 @@ extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_AT
extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session);
+extern int __wt_log_printf(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn);
extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn);
@@ -406,6 +407,12 @@ extern int __wt_logop_row_remove_print(WT_SESSION_IMPL *session, const uint8_t *
extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_checkpoint_start_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_checkpoint_start_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_checkpoint_start_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_prev_lsn_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *prev_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_prev_lsn_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *prev_lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_prev_lsn_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index d7a28bc7380..93d9a3dc7d1 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -4503,6 +4503,10 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_LOGOP_ROW_REMOVE 5
/*! row truncate */
#define WT_LOGOP_ROW_TRUNCATE 6
+/*! checkpoint start */
+#define WT_LOGOP_CHECKPOINT_START 7
+/*! previous LSN */
+#define WT_LOGOP_PREV_LSN 8
/*! @} */
/*******************************************
diff --git a/src/log/log.c b/src/log/log.c
index 2a912e6568f..1460687d5af 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -20,11 +20,11 @@ static int __log_write_internal(
#define WT_LOG_OPEN_CREATE_OK 0x01
/*
- * __log_printf_internal --
- * Internal call to write a log message.
+ * __wt_log_printf --
+ * Write a text message to the log.
*/
-static int
-__log_printf_internal(WT_SESSION_IMPL *session, const char *fmt, ...)
+int
+__wt_log_printf(WT_SESSION_IMPL *session, const char *fmt, ...)
{
WT_DECL_RET;
va_list ap;
@@ -1259,7 +1259,7 @@ __wt_log_set_version(WT_SESSION_IMPL *session, uint16_t version,
* an archive correctly removes all earlier logs.
* Write an internal printf record.
*/
- WT_ERR(__log_printf_internal(session,
+ WT_ERR(__wt_log_printf(session,
"COMPATIBILITY: Version now %" PRIu16, log->log_version));
if (lognump != NULL)
*lognump = log->alloc_lsn.l.file;
diff --git a/src/log/log_auto.c b/src/log/log_auto.c
index c2b38184405..76df4ebd98f 100644
--- a/src/log/log_auto.c
+++ b/src/log/log_auto.c
@@ -149,9 +149,9 @@ __wt_logop_col_put_print(WT_SESSION_IMPL *session,
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"optype\": \"col_put\",\n"));
WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
- " \"fileid\": \"%" PRIu32 "\",\n", fileid));
+ " \"fileid\": %" PRIu32 ",\n", fileid));
WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
- " \"recno\": \"%" PRIu64 "\",\n", recno));
+ " \"recno\": %" PRIu64 ",\n", recno));
WT_ERR(__logrec_make_json_str(session, &escaped, &value));
WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"value\": \"%s\"", escaped));
@@ -221,9 +221,9 @@ __wt_logop_col_remove_print(WT_SESSION_IMPL *session,
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"optype\": \"col_remove\",\n"));
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
- " \"fileid\": \"%" PRIu32 "\",\n", fileid));
+ " \"fileid\": %" PRIu32 ",\n", fileid));
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
- " \"recno\": \"%" PRIu64 "\"", recno));
+ " \"recno\": %" PRIu64 "", recno));
return (0);
}
@@ -284,11 +284,11 @@ __wt_logop_col_truncate_print(WT_SESSION_IMPL *session,
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"optype\": \"col_truncate\",\n"));
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
- " \"fileid\": \"%" PRIu32 "\",\n", fileid));
+ " \"fileid\": %" PRIu32 ",\n", fileid));
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
- " \"start\": \"%" PRIu64 "\",\n", start));
+ " \"start\": %" PRIu64 ",\n", start));
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
- " \"stop\": \"%" PRIu64 "\"", stop));
+ " \"stop\": %" PRIu64 "", stop));
return (0);
}
@@ -351,7 +351,7 @@ __wt_logop_row_put_print(WT_SESSION_IMPL *session,
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"optype\": \"row_put\",\n"));
WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
- " \"fileid\": \"%" PRIu32 "\",\n", fileid));
+ " \"fileid\": %" PRIu32 ",\n", fileid));
WT_ERR(__logrec_make_json_str(session, &escaped, &key));
WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"key\": \"%s\",\n", escaped));
@@ -431,7 +431,7 @@ __wt_logop_row_remove_print(WT_SESSION_IMPL *session,
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"optype\": \"row_remove\",\n"));
WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
- " \"fileid\": \"%" PRIu32 "\",\n", fileid));
+ " \"fileid\": %" PRIu32 ",\n", fileid));
WT_ERR(__logrec_make_json_str(session, &escaped, &key));
WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"key\": \"%s\"", escaped));
@@ -505,7 +505,7 @@ __wt_logop_row_truncate_print(WT_SESSION_IMPL *session,
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"optype\": \"row_truncate\",\n"));
WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
- " \"fileid\": \"%" PRIu32 "\",\n", fileid));
+ " \"fileid\": %" PRIu32 ",\n", fileid));
WT_ERR(__logrec_make_json_str(session, &escaped, &start));
WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
" \"start\": \"%s\",\n", escaped));
@@ -523,13 +523,129 @@ __wt_logop_row_truncate_print(WT_SESSION_IMPL *session,
" \"stop-hex\": \"%s\",\n", escaped));
}
WT_ERR(__wt_fprintf(session, WT_STDOUT(session),
- " \"mode\": \"%" PRIu32 "\"", mode));
+ " \"mode\": %" PRIu32 "", mode));
err: __wt_free(session, escaped);
return (ret);
}
int
+__wt_logop_checkpoint_start_pack(
+ WT_SESSION_IMPL *session, WT_ITEM *logrec
+ )
+{
+ const char *fmt = WT_UNCHECKED_STRING(II);
+ size_t size;
+ uint32_t optype, recsize;
+
+ optype = WT_LOGOP_CHECKPOINT_START;
+ WT_RET(__wt_struct_size(session, &size, fmt,
+ optype, 0));
+
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(session,
+ (uint8_t *)logrec->data + logrec->size, size, fmt,
+ optype, recsize));
+
+ logrec->size += (uint32_t)size;
+ return (0);
+}
+
+int
+__wt_logop_checkpoint_start_unpack(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end
+ )
+{
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(II);
+ uint32_t optype, size;
+
+ if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
+ &optype, &size)) != 0)
+ WT_RET_MSG(session, ret, "logop_checkpoint_start: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_CHECKPOINT_START);
+
+ *pp += size;
+ return (0);
+}
+
+int
+__wt_logop_checkpoint_start_print(WT_SESSION_IMPL *session,
+ const uint8_t **pp, const uint8_t *end, uint32_t flags)
+{
+
+ WT_UNUSED(flags);
+ WT_RET(__wt_logop_checkpoint_start_unpack(
+ session, pp, end));
+
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"optype\": \"checkpoint_start\",\n"));
+
+ return (0);
+}
+
+int
+__wt_logop_prev_lsn_pack(
+ WT_SESSION_IMPL *session, WT_ITEM *logrec,
+ WT_LSN *prev_lsn)
+{
+ const char *fmt = WT_UNCHECKED_STRING(IIII);
+ size_t size;
+ uint32_t optype, recsize;
+
+ optype = WT_LOGOP_PREV_LSN;
+ WT_RET(__wt_struct_size(session, &size, fmt,
+ optype, 0, prev_lsn->l.file, prev_lsn->l.offset));
+
+ __wt_struct_size_adjust(session, &size);
+ WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+ recsize = (uint32_t)size;
+ WT_RET(__wt_struct_pack(session,
+ (uint8_t *)logrec->data + logrec->size, size, fmt,
+ optype, recsize, prev_lsn->l.file, prev_lsn->l.offset));
+
+ logrec->size += (uint32_t)size;
+ return (0);
+}
+
+int
+__wt_logop_prev_lsn_unpack(
+ WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+ WT_LSN *prev_lsnp)
+{
+ WT_DECL_RET;
+ const char *fmt = WT_UNCHECKED_STRING(IIII);
+ uint32_t optype, size;
+
+ if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
+ &optype, &size, &prev_lsnp->l.file, &prev_lsnp->l.offset)) != 0)
+ WT_RET_MSG(session, ret, "logop_prev_lsn: unpack failure");
+ WT_ASSERT(session, optype == WT_LOGOP_PREV_LSN);
+
+ *pp += size;
+ return (0);
+}
+
+int
+__wt_logop_prev_lsn_print(WT_SESSION_IMPL *session,
+ const uint8_t **pp, const uint8_t *end, uint32_t flags)
+{
+ WT_LSN prev_lsn;
+
+ WT_UNUSED(flags);
+ WT_RET(__wt_logop_prev_lsn_unpack(
+ session, pp, end, &prev_lsn));
+
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"optype\": \"prev_lsn\",\n"));
+ WT_RET(__wt_fprintf(session, WT_STDOUT(session),
+ " \"prev_lsn\": [%" PRIu32 ", %" PRIu32 "]", prev_lsn.l.file, prev_lsn.l.offset));
+ return (0);
+}
+
+int
__wt_txn_op_printlog(WT_SESSION_IMPL *session,
const uint8_t **pp, const uint8_t *end, uint32_t flags)
{
@@ -564,6 +680,14 @@ __wt_txn_op_printlog(WT_SESSION_IMPL *session,
WT_RET(__wt_logop_row_truncate_print(session, pp, end, flags));
break;
+ case WT_LOGOP_CHECKPOINT_START:
+ WT_RET(__wt_logop_checkpoint_start_print(session, pp, end, flags));
+ break;
+
+ case WT_LOGOP_PREV_LSN:
+ WT_RET(__wt_logop_prev_lsn_print(session, pp, end, flags));
+ break;
+
WT_ILLEGAL_VALUE(session);
}
diff --git a/src/log/log_sys.c b/src/log/log_sys.c
index 0f3cfdbc14b..4eb2a8e23d2 100644
--- a/src/log/log_sys.c
+++ b/src/log/log_sys.c
@@ -18,31 +18,28 @@ __wt_log_system_record(
{
WT_DECL_ITEM(logrec_buf);
WT_DECL_RET;
- WT_ITEM *dummy, empty;
WT_LOG *log;
WT_LOG_RECORD *logrec;
WT_LOGSLOT tmp;
WT_MYSLOT myslot;
- const char *fmt = WT_UNCHECKED_STRING(IIIU);
+ const char *fmt = WT_UNCHECKED_STRING(I);
uint32_t rectype = WT_LOGREC_SYSTEM;
size_t recsize;
log = S2C(session)->log;
WT_RET(__wt_logrec_alloc(session, log->allocsize, &logrec_buf));
memset((uint8_t *)logrec_buf->mem, 0, log->allocsize);
- WT_CLEAR(empty);
- dummy = &empty;
- /*
- * There is currently an unused portion of the system record for
- * future use. Send in a NULL entry.
- */
- WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype,
- lsn->l.file, lsn->l.offset, dummy));
- WT_ASSERT(session, recsize <= log->allocsize);
+
+ WT_ERR(__wt_struct_size(session, &recsize, fmt, rectype));
WT_ERR(__wt_struct_pack(session,
(uint8_t *)logrec_buf->data + logrec_buf->size, recsize, fmt,
- rectype, lsn->l.file, lsn->l.offset, dummy));
+ rectype));
+ logrec_buf->size += recsize;
+ WT_ERR(__wt_logop_prev_lsn_pack(session, logrec_buf, lsn));
+ WT_ASSERT(session, logrec_buf->size <= log->allocsize);
+
logrec = (WT_LOG_RECORD *)logrec_buf->mem;
+
/*
* We know system records are this size. And we have to adjust
* the size now because we're not going through the normal log
@@ -50,9 +47,8 @@ __wt_log_system_record(
* earlier.
*/
logrec_buf->size = logrec->len = log->allocsize;
- /*
- * We do not compress nor encrypt this record.
- */
+
+ /* We do not compress nor encrypt this record. */
logrec->checksum = 0;
logrec->flags = 0;
__wt_log_record_byteswap(logrec);
@@ -82,16 +78,10 @@ __wt_log_recover_system(WT_SESSION_IMPL *session,
const uint8_t **pp, const uint8_t *end, WT_LSN *lsnp)
{
WT_DECL_RET;
- WT_ITEM unused;
- uint32_t prev_file, prev_offset;
- const char *fmt = WT_UNCHECKED_STRING(IIU);
- if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &prev_file, &prev_offset, &unused)) != 0)
+ if ((ret = __wt_logop_prev_lsn_unpack(session, pp, end, lsnp)) != 0)
WT_RET_MSG(session, ret,
"log_recover_prevlsn: unpack failure");
- if (lsnp != NULL)
- WT_SET_LSN(lsnp, prev_file, prev_offset);
- *pp = end;
+
return (0);
}
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index abe4c085e50..d2fc42804a4 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -109,11 +109,11 @@ __txn_op_log(WT_SESSION_IMPL *session,
}
/*
- * __txn_commit_printlog --
- * Print a commit log record.
+ * __txn_oplist_printlog --
+ * Print a list of operations from a log record.
*/
static int
-__txn_commit_printlog(WT_SESSION_IMPL *session,
+__txn_oplist_printlog(WT_SESSION_IMPL *session,
const uint8_t **pp, const uint8_t *end, uint32_t flags)
{
bool firstrecord;
@@ -344,8 +344,8 @@ __wt_txn_checkpoint_log(
WT_TXN *txn;
uint8_t *end, *p;
size_t recsize;
- uint32_t i, rectype = WT_LOGREC_CHECKPOINT;
- const char *fmt = WT_UNCHECKED_STRING(IIIIu);
+ uint32_t i, rectype;
+ const char *fmt;
conn = S2C(session);
txn = &session->txn;
@@ -367,7 +367,31 @@ __wt_txn_checkpoint_log(
switch (flags) {
case WT_TXN_LOG_CKPT_PREPARE:
txn->full_ckpt = true;
- WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true));
+
+ if (conn->compat_major >= WT_LOG_V2) {
+ /*
+ * Write the system log record containing a checkpoint
+ * start operation.
+ */
+ rectype = WT_LOGREC_SYSTEM;
+ fmt = WT_UNCHECKED_STRING(I);
+ WT_ERR(__wt_struct_size(
+ session, &recsize, fmt, rectype));
+ WT_ERR(__wt_logrec_alloc(session, recsize, &logrec));
+
+ WT_ERR(__wt_struct_pack(session,
+ (uint8_t *)logrec->data + logrec->size, recsize,
+ fmt, rectype));
+ logrec->size += (uint32_t)recsize;
+ WT_ERR(__wt_logop_checkpoint_start_pack(
+ session, logrec));
+ WT_ERR(__wt_log_write(session, logrec, ckpt_lsn, 0));
+ } else {
+ WT_ERR(__wt_log_printf(session,
+ "CHECKPOINT: Starting record"));
+ WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true));
+ }
+
/*
* We need to make sure that the log records in the checkpoint
* LSN are on disk. In particular to make sure that the
@@ -401,14 +425,16 @@ __wt_txn_checkpoint_log(
ckpt_snapshot = txn->ckpt_snapshot;
/* Write the checkpoint log record. */
- WT_ERR(__wt_struct_size(session, &recsize, fmt,
- rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset,
+ rectype = WT_LOGREC_CHECKPOINT;
+ fmt = WT_UNCHECKED_STRING(IIIIu);
+ WT_ERR(__wt_struct_size(session, &recsize,
+ fmt, rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset,
txn->ckpt_nsnapshot, ckpt_snapshot));
WT_ERR(__wt_logrec_alloc(session, recsize, &logrec));
WT_ERR(__wt_struct_pack(session,
- (uint8_t *)logrec->data + logrec->size, recsize, fmt,
- rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset,
+ (uint8_t *)logrec->data + logrec->size, recsize,
+ fmt, rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset,
txn->ckpt_nsnapshot, ckpt_snapshot));
logrec->size += (uint32_t)recsize;
WT_ERR(__wt_log_write(session, logrec, lsnp,
@@ -568,7 +594,7 @@ __txn_printlog(WT_SESSION_IMPL *session,
" \"type\" : \"commit\",\n"));
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"txnid\" : %" PRIu64 ",\n", txnid));
- WT_RET(__txn_commit_printlog(session, &p, end, args->flags));
+ WT_RET(__txn_oplist_printlog(session, &p, end, args->flags));
break;
case WT_LOGREC_FILE_SYNC:
@@ -596,9 +622,7 @@ __txn_printlog(WT_SESSION_IMPL *session,
WT_UNCHECKED_STRING(II), &lsnfile, &lsnoffset));
WT_RET(__wt_fprintf(session, WT_STDOUT(session),
" \"type\" : \"system\",\n"));
- WT_RET(__wt_fprintf(session, WT_STDOUT(session),
- " \"prev_lsn\" : [%" PRIu32 ",%" PRIu32 "]\n",
- lsnfile, lsnoffset));
+ WT_RET(__txn_oplist_printlog(session, &p, end, args->flags));
break;
}
diff --git a/test/suite/test_compat01.py b/test/suite/test_compat01.py
index 460d38d5e08..1c9f07c02f2 100644
--- a/test/suite/test_compat01.py
+++ b/test/suite/test_compat01.py
@@ -102,7 +102,7 @@ class test_compat01(wttest.WiredTigerTestCase, suite_subprocess):
contains = 0
with open('printlog.out') as logfile:
for line in logfile:
- if 'prev_lsn' in line:
+ if 'optype' in line and 'prev_lsn' in line:
contains += 1
self.assertEqual(prev_lsn_count, contains)
diff --git a/test/suite/test_timestamp03.py b/test/suite/test_timestamp03.py
new file mode 100644
index 00000000000..5b42896fdcd
--- /dev/null
+++ b/test/suite/test_timestamp03.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2017 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_timestamp03.py
+# Timestamps: checkpoints
+#
+
+from helper import copy_wiredtiger_home
+import random
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+ return '%x' % t
+
+def timestamp_ret_str(t):
+ s = timestamp_str(t)
+ if len(s) % 2 == 1:
+ s = '0' + s
+ return s
+
+class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess):
+ tablename = 'ts03_ts_nologged'
+ tablename2 = 'ts03_nots_logged'
+ tablename3 = 'ts03_ts_logged'
+
+ types = [
+ ('file', dict(uri='file:', use_cg=False, use_index=False)),
+ ('lsm', dict(uri='lsm:', use_cg=False, use_index=False)),
+ ('table-cg', dict(uri='table:', use_cg=True, use_index=False)),
+ ('table-index', dict(uri='table:', use_cg=False, use_index=True)),
+ ('table-simple', dict(uri='table:', use_cg=False, use_index=False)),
+ ]
+
+ conncfg = [
+ ('nolog', dict(conncfg='create')),
+ ('V1', dict(conncfg='create,log=(enabled),compatibility=(release="2.9")')),
+ ('V2', dict(conncfg='create,log=(enabled)')),
+ ]
+
+ ckpt = [
+ ('read_ts', dict(ckptcfg='read_timestamp', val='none')),
+ ]
+
+ scenarios = make_scenarios(types, ckpt, conncfg)
+
+ # Binary values.
+ value = u'\u0001\u0002abcd\u0003\u0004'
+ value2 = u'\u0001\u0002dcba\u0003\u0004'
+
+ def conn_config(self):
+ return self.conncfg
+
+ # Check that a cursor (optionally started in a new transaction), sees the
+ # expected values.
+ def check(self, session, txn_config, expected):
+ if txn_config:
+ session.begin_transaction(txn_config)
+ c = session.open_cursor(self.uri + self.tablename, None)
+ actual = dict((k, v) for k, v in c if v != 0)
+ self.assertEqual(actual, expected)
+ # Search for the expected items as well as iterating
+ for k, v in expected.iteritems():
+ self.assertEqual(c[k], v, "for key " + str(k))
+ c.close()
+ if txn_config:
+ session.commit_transaction()
+
+ # Check that a cursor sees the expected values after a checkpoint.
+ def ckpt_backup(self, valcnt, valcnt2, valcnt3):
+ newdir = "BACKUP"
+
+ # Take a checkpoint. Make a copy of the database. Open the
+ # copy and verify whether or not the expected data is in there.
+ self.pr("CKPT: " + self.ckptcfg)
+ ckptcfg = self.ckptcfg
+ if ckptcfg == 'read_timestamp':
+ ckptcfg = self.ckptcfg + '=' + self.oldts
+ # print "CKPT: " + ckptcfg
+ self.session.checkpoint(ckptcfg)
+ copy_wiredtiger_home('.', newdir, True)
+
+ conn = self.setUpConnectionOpen(newdir)
+ session = self.setUpSessionOpen(conn)
+ c = session.open_cursor(self.uri + self.tablename, None)
+ c2 = session.open_cursor(self.uri + self.tablename2, None)
+ c3 = session.open_cursor(self.uri + self.tablename3, None)
+ # Count how many times the second value is present
+ count = 0
+ for k, v in c:
+ if self.value2 in str(v):
+ # print "value2 found in key " + str(k)
+ count += 1
+ # else:
+ # print "Non-value2: key " + str(k)
+ c.close()
+ # Count how many times the second value is present in the
+ # non-timestamp table.
+ count2 = 0
+ for k, v in c2:
+ if self.value2 in str(v):
+ # print "value2 found in key " + str(k)
+ count2 += 1
+ # else:
+ # print "Non-value2: key " + str(k)
+ c2.close()
+ # Count how many times the second value is present in the
+ # logged timestamp table.
+ count3 = 0
+ for k, v in c3:
+ if self.value2 in str(v):
+ # print "value2 found in key " + str(k)
+ count3 += 1
+ # else:
+ # print "Non-value2: key " + str(k)
+ c3.close()
+ conn.close()
+ # print "CHECK BACKUP: Count " + str(count) + " Count2 " + str(count2) + " Count3 " + str(count3)
+ # print "CHECK BACKUP: Expect value2 count " + str(valcnt)
+ # print "CHECK BACKUP: 2nd table Expect value2 count " + str(valcnt2)
+ # print "CHECK BACKUP: 3rd table Expect value2 count " + str(valcnt3)
+ # print "CHECK BACKUP: config " + str(self.ckptcfg)
+ self.assertEqual(count, valcnt)
+ self.assertEqual(count2, valcnt2)
+ self.assertEqual(count3, valcnt3)
+
+ def test_timestamp03(self):
+ if not wiredtiger.timestamp_build():
+ self.skipTest('requires a timestamp build')
+
+ uri = self.uri + self.tablename
+ uri2 = self.uri + self.tablename2
+ uri3 = self.uri + self.tablename3
+ #
+ # Open three tables:
+ # 1. Table is not logged and uses timestamps.
+ # 2. Table is logged and does not use timestamps.
+ # 3. Table is logged and uses timestamps.
+ #
+ self.session.create(uri, 'key_format=i,value_format=S,log=(enabled=false)')
+ c = self.session.open_cursor(uri)
+ self.session.create(uri2, 'key_format=i,value_format=S')
+ c2 = self.session.open_cursor(uri2)
+ self.session.create(uri3, 'key_format=i,value_format=S')
+ c3 = self.session.open_cursor(uri3)
+
+ # Insert keys 1..100 each with timestamp=key, in some order
+ nkeys = 100
+ orig_keys = range(1, nkeys+1)
+ keys = orig_keys[:]
+ random.shuffle(keys)
+
+ for k in keys:
+ c2[k] = self.value
+ self.session.begin_transaction()
+ c[k] = self.value
+ c3[k] = self.value
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(k))
+
+ # Now check that we see the expected state when reading at each
+ # timestamp
+ for i, t in enumerate(orig_keys):
+ self.check(self.session, 'read_timestamp=' + timestamp_str(t),
+ dict((k, self.value) for k in orig_keys[:i+1]))
+
+ # Bump the oldest timestamp, we're not going back...
+ self.assertEqual(self.conn.query_timestamp(), timestamp_ret_str(100))
+ self.oldts = timestamp_str(100)
+ self.conn.set_timestamp('oldest_timestamp=' + self.oldts)
+ # print "Oldest " + self.oldts
+
+ # Update them and retry.
+ random.shuffle(keys)
+ count = 0
+ for k in keys:
+ # Make sure a timestamp cursor is the last one to update. This
+ # tests the scenario for a bug we found where recovery replayed
+ # the last record written into the log.
+ #
+ # print "Key " + str(k) + " to value2"
+ c2[k] = self.value2
+ self.session.begin_transaction()
+ c[k] = self.value2
+ c3[k] = self.value2
+ ts = timestamp_str(k + 101)
+ self.session.commit_transaction('commit_timestamp=' + ts)
+ # print "Commit key " + str(k) + " ts " + ts
+ count += 1
+ # print "Updated " + str(count) + " keys to value2"
+
+ # Take a checkpoint using the given configuration. Then verify
+ # whether value2 appears in a copy of that data or not.
+ if self.val == 'all':
+ valcnt = nkeys
+ else:
+ valcnt = 0
+ # Table 2 should always see all the keys
+ # Table 3 should see whatever table 1 sees.
+ valcnt2 = nkeys
+ valcnt3 = valcnt
+ self.ckpt_backup(valcnt, valcnt2, valcnt3)
+
+if __name__ == '__main__':
+ wttest.run()