diff options
author | Sulabh Mahajan <sulabh.mahajan@mongodb.com> | 2017-07-24 09:56:58 +1000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-07-24 09:56:58 +1000 |
commit | 71c66de3023f2a73f51cd6c9ee688a2a674193d6 (patch) | |
tree | b8fbe44247c6ea9485870f65ef560a01972d0c61 | |
parent | 311b718ccd1eefffc9f056a1524173ce46ac955d (diff) | |
download | mongo-71c66de3023f2a73f51cd6c9ee688a2a674193d6.tar.gz |
WT-3380 Make 8-byte timestamps a special case (#3509)
* Change wt_timestamp_t to union of uint64 and uint8 array
* Add contents from Keith's change
* Whitespace and s_string ok addition
* Get rid of WT_GET_TIMESTAMP_PTR
* Fix the change after the merge
* Remove superfluous struct around union, simplify macros
* Remove packed attribute from WT_UPDATE. (#3523)
* Remove packed attribute from WT_UPDATE.
Add 3B of data declaration at the end of the WT_UPDATE structure. That
way we don't have to pack the structure to avoid wasting data bytes,
and we don't have to use a macro to identify the start of the data.
* Locate the timestamp in the WT_UPDATE structure depending on its
alignment, to avoid padding.
* I lost a change, the size of the WT_UPDATE structure has to reflect the
size of the timestamp.
* Change the __wt_timestamp_t union into a structure so the compiler doesn't
insert padding in the middle of the WT_UPDATE structure (the existence of
the uint64_t in the union causes the whole thing to be aligned, even if we
never access it).
Incorporate Michael's change to replace sizeof(WT_UPDATE) with WT_UPDATESIZE,
the compiler is padding the structure at the end and we need to ignore that.
* If there's no union, we can't reach into it and get the field, take
the address like we do everywhere else.
* Simplify size calculations for WT_UPDATE.
In particular, go back to having WT_UPDATE_SIZE match the size of a
WT_UPDATE excluding the payload data. That means the declared size of
the data array in WT_UPDATE is no longer special.
* Switch from a 3-byte array to a C99 flexible array member.
There is no longer anything special about the 3 byte array: since the
timestamp can be any size, there is no guarantee it makes a WT_UPDATE
any nicely rounded size. Compilers enforce some rules around how
flexible array members can be used: we should consider switching our
other uses of structs with trailing arrays (in a separate ticket).
* Remove some more unnecessary casts now that upd->data is typed.
-rw-r--r-- | build_posix/configure.ac.in | 2 | ||||
-rw-r--r-- | dist/s_string.ok | 2 | ||||
-rwxr-xr-x | dist/s_style | 5 | ||||
-rw-r--r-- | src/btree/bt_curnext.c | 10 | ||||
-rw-r--r-- | src/btree/bt_curprev.c | 10 | ||||
-rw-r--r-- | src/btree/bt_debug.c | 24 | ||||
-rw-r--r-- | src/btree/bt_delete.c | 12 | ||||
-rw-r--r-- | src/btree/bt_ovfl.c | 2 | ||||
-rw-r--r-- | src/btree/bt_read.c | 2 | ||||
-rw-r--r-- | src/btree/bt_ret.c | 2 | ||||
-rw-r--r-- | src/btree/row_modify.c | 6 | ||||
-rw-r--r-- | src/conn/conn_sweep.c | 2 | ||||
-rw-r--r-- | src/docs/transactions.dox | 2 | ||||
-rw-r--r-- | src/evict/evict_page.c | 2 | ||||
-rw-r--r-- | src/include/btmem.h | 31 | ||||
-rw-r--r-- | src/include/btree.i | 4 | ||||
-rw-r--r-- | src/include/cursor.i | 2 | ||||
-rw-r--r-- | src/include/extern.h | 2 | ||||
-rw-r--r-- | src/include/misc.h | 22 | ||||
-rw-r--r-- | src/include/txn.i | 90 | ||||
-rw-r--r-- | src/include/verify_build.h | 10 | ||||
-rw-r--r-- | src/include/wt_internal.h | 2 | ||||
-rw-r--r-- | src/reconcile/rec_write.c | 56 | ||||
-rw-r--r-- | src/txn/txn.c | 30 | ||||
-rw-r--r-- | src/txn/txn_ckpt.c | 2 | ||||
-rw-r--r-- | src/txn/txn_log.c | 2 | ||||
-rw-r--r-- | src/txn/txn_timestamp.c | 136 |
27 files changed, 301 insertions, 171 deletions
diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in index 4de12d5161e..d502b9932cb 100644 --- a/build_posix/configure.ac.in +++ b/build_posix/configure.ac.in @@ -75,7 +75,7 @@ AC_SUBST(AM_LIBTOOLFLAGS) # WiredTiger uses anonymous unions to pad structures. It's part of C11, but # some compilers require -std=c11 to support them. Turn on that flag for any # compiler that supports it, except for Solaris, where gcc -std=c11 makes -# some none-C11 prototypes unavailable. +# some non-C11 prototypes unavailable. if test "$wt_cv_solaris" = "no"; then AX_CHECK_COMPILE_FLAG([-std=c11], [AM_CFLAGS="$AM_CFLAGS -std=c11"]) fi diff --git a/dist/s_string.ok b/dist/s_string.ok index 723f6d18858..4a4b60a9d79 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -350,6 +350,8 @@ Syscall TAILQ TCMalloc TESTUTIL +TIMESTAMP +TIMESTAMPS TODO TORTIOUS TSO diff --git a/dist/s_style b/dist/s_style index 69cf1f667fa..d5dc31ba37a 100755 --- a/dist/s_style +++ b/dist/s_style @@ -56,6 +56,11 @@ else cat $t fi + if grep 'sizeof(WT_UPDATE)' $f > $t; then + echo "$f: Use WT_UPDATE_SIZE rather than sizeof(WT_UPDATE)" + cat $t + fi + if ! expr "$f" : 'src/include/queue\.h' > /dev/null && egrep 'STAILQ_|SLIST_|\bLIST_' $f ; then echo "$f: use TAILQ for all lists" diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c index cb59bff8f75..c20d0af0d69 100644 --- a/src/btree/bt_curnext.c +++ b/src/btree/bt_curnext.c @@ -61,7 +61,7 @@ __cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage) cbt->v = 0; val->data = &cbt->v; } else - val->data = WT_UPDATE_DATA(upd); + val->data = upd->data; val->size = 1; return (0); } @@ -110,7 +110,7 @@ new_page: cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt); val->data = &cbt->v; } else - val->data = WT_UPDATE_DATA(upd); + val->data = upd->data; val->size = 1; return (0); } @@ -147,7 +147,7 @@ new_page: if (cbt->ins == NULL) ++cbt->page_deleted_count; continue; } - val->data = WT_UPDATE_DATA(upd); + val->data = upd->data; val->size = upd->size; return (0); } @@ -211,7 +211,7 @@ new_page: /* Find the matching WT_COL slot. */ continue; } - val->data = WT_UPDATE_DATA(upd); + val->data = upd->data; val->size = upd->size; return (0); } @@ -332,7 +332,7 @@ new_insert: if ((ins = cbt->ins) != NULL) { } key->data = WT_INSERT_KEY(ins); key->size = WT_INSERT_KEY_SIZE(ins); - val->data = WT_UPDATE_DATA(upd); + val->data = upd->data; val->size = upd->size; return (0); } diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c index 6e49f4df68c..36a4ca57419 100644 --- a/src/btree/bt_curprev.c +++ b/src/btree/bt_curprev.c @@ -207,7 +207,7 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage) cbt->v = 0; val->data = &cbt->v; } else - val->data = WT_UPDATE_DATA(upd); + val->data = upd->data; val->size = 1; return (0); } @@ -256,7 +256,7 @@ new_page: cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt); val->data = &cbt->v; } else - val->data = WT_UPDATE_DATA(upd); + val->data = upd->data; val->size = 1; return (0); } @@ -293,7 +293,7 @@ new_page: if (cbt->ins == NULL) ++cbt->page_deleted_count; continue; } - val->data = WT_UPDATE_DATA(upd); + val->data = upd->data; val->size = upd->size; return (0); } @@ -358,7 +358,7 @@ new_page: if (cbt->recno < cbt->ref->ref_recno) continue; } - val->data = WT_UPDATE_DATA(upd); + val->data = upd->data; val->size = upd->size; return (0); } @@ -489,7 +489,7 @@ new_insert: if ((ins = cbt->ins) != NULL) { } key->data = WT_INSERT_KEY(ins); key->size = WT_INSERT_KEY_SIZE(ins); - val->data = WT_UPDATE_DATA(upd); + val->data = upd->data; val->size = upd->size; return (0); } diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index c0aaf3f42d9..69b7fd9bdca 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -999,31 +999,25 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte) WT_RET(ds->f(ds, "\tvalue {reserved}\n")); else if (hexbyte) { WT_RET(ds->f(ds, "\t{")); - WT_RET(__debug_hex_byte(ds, - *(uint8_t *)WT_UPDATE_DATA(upd))); + WT_RET(__debug_hex_byte(ds, *upd->data)); WT_RET(ds->f(ds, "}\n")); } else - WT_RET(__debug_item(ds, - "value", WT_UPDATE_DATA(upd), upd->size)); + WT_RET(__debug_item(ds, "value", upd->data, upd->size)); WT_RET(ds->f(ds, "\t" "txn id %" PRIu64, upd->txnid)); #ifdef HAVE_TIMESTAMPS - if (!__wt_timestamp_iszero(upd->timestamp)) { + if (!__wt_timestamp_iszero( + WT_TIMESTAMP_NULL(&upd->timestamp))) { #if WT_TIMESTAMP_SIZE == 8 - { - uint64_t ts; - __wt_timestamp_set( - (uint8_t *)&ts, (uint8_t *)&upd->timestamp[0]); - ts = __wt_bswap64(ts); - WT_RET(ds->f(ds, ", stamp %" PRIu64, ts)); - } + WT_RET(ds->f(ds, + ", stamp %" PRIu64, upd->timestamp.val)); #else - { int i; + WT_RET(ds->f(ds, ", stamp 0x")); for (i = 0; i < WT_TIMESTAMP_SIZE; ++i) - WT_RET(ds->f(ds, "%" PRIx8, upd->timestamp[i])); - } + WT_RET(ds->f(ds, + "%" PRIx8, upd->timestamp.ts[i])); #endif } #endif diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c index 0e5cfcec7a7..7bf471e7cf5 100644 --- a/src/btree/bt_delete.c +++ b/src/btree/bt_delete.c @@ -245,10 +245,10 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) return (false); skip = ref->page_del == NULL || (visible_all ? - __wt_txn_visible_all(session, - ref->page_del->txnid, WT_GET_TIMESTAMP(ref->page_del)): - __wt_txn_visible(session, - ref->page_del->txnid, WT_GET_TIMESTAMP(ref->page_del))); + __wt_txn_visible_all(session, ref->page_del->txnid, + WT_TIMESTAMP_NULL(&ref->page_del->timestamp)): + __wt_txn_visible(session, ref->page_del->txnid, + WT_TIMESTAMP_NULL(&ref->page_del->timestamp))); /* * The page_del structure can be freed as soon as the delete is stable: @@ -257,8 +257,8 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) * no longer need synchronization to check the ref. */ if (skip && ref->page_del != NULL && (visible_all || - __wt_txn_visible_all(session, - ref->page_del->txnid, WT_GET_TIMESTAMP(ref->page_del)))) { + __wt_txn_visible_all(session, ref->page_del->txnid, + WT_TIMESTAMP_NULL(&ref->page_del->timestamp)))) { __wt_free(session, ref->page_del->update_list); __wt_free(session, ref->page_del); } diff --git a/src/btree/bt_ovfl.c b/src/btree/bt_ovfl.c index a0b1ff65006..596446d9b8b 100644 --- a/src/btree/bt_ovfl.c +++ b/src/btree/bt_ovfl.c @@ -78,7 +78,7 @@ __wt_ovfl_read(WT_SESSION_IMPL *session, break; } WT_ASSERT(session, i < track->remove_next); - store->data = WT_UPDATE_DATA(upd); + store->data = upd->data; store->size = upd->size; } else ret = __ovfl_read(session, unpack->data, unpack->size, store); diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index 6a89f505c31..5302c8b0de8 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -194,7 +194,7 @@ __las_page_instantiate(WT_SESSION_IMPL *session, upd->txnid = upd_txnid; #ifdef HAVE_TIMESTAMPS WT_ASSERT(session, las_timestamp.size == WT_TIMESTAMP_SIZE); - __wt_timestamp_set(upd->timestamp, las_timestamp.data); + __wt_timestamp_set(&upd->timestamp, las_timestamp.data); #endif switch (page->type) { diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c index 7212de72d6e..5921ad94b6d 100644 --- a/src/btree/bt_ret.c +++ b/src/btree/bt_ret.c @@ -95,7 +95,7 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) /* If the cursor references a WT_UPDATE item, return it. */ if (upd != NULL) { - cursor->value.data = WT_UPDATE_DATA(upd); + cursor->value.data = upd->data; cursor->value.size = upd->size; return (0); } diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index e2d19bf705b..4099069d510 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -268,13 +268,13 @@ __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, */ if (modify_type == WT_UPDATE_DELETED || modify_type == WT_UPDATE_RESERVED) - WT_RET(__wt_calloc(session, 1, sizeof(WT_UPDATE), &upd)); + WT_RET(__wt_calloc(session, 1, WT_UPDATE_SIZE, &upd)); else { WT_RET(__wt_calloc( - session, 1, sizeof(WT_UPDATE) + value->size, &upd)); + session, 1, WT_UPDATE_SIZE + value->size, &upd)); if (value->size != 0) { upd->size = WT_STORE_SIZE(value->size); - memcpy(WT_UPDATE_DATA(upd), value->data, value->size); + memcpy(upd->data, value->data, value->size); } } upd->type = (uint8_t)modify_type; diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index 592d66b5294..7236735715f 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -85,7 +85,7 @@ __sweep_expire_one(WT_SESSION_IMPL *session) /* Only sweep clean trees where all updates are visible. */ if (btree->modified || !__wt_txn_visible_all(session, - btree->rec_max_txn, WT_TIMESTAMP(btree->rec_max_timestamp))) + btree->rec_max_txn, WT_TIMESTAMP_NULL(&btree->rec_max_timestamp))) goto err; /* diff --git a/src/docs/transactions.dox b/src/docs/transactions.dox index 8a05de9b5f5..64d1c701acc 100644 --- a/src/docs/transactions.dox +++ b/src/docs/transactions.dox @@ -164,7 +164,7 @@ timestamp size is 8 bytes (i.e., 64 bits). Setting a size of zero disables transaction timestamp functionality. Applications can assign explicit commit timestamps to transactions, then read -"as of" a timestamp. Timestamps and are communicated to WiredTiger using a +"as of" a timestamp. Timestamps are communicated to WiredTiger using a lower case hexadecimal encoding, so the encoded value can be twice as long as the raw timestamp value. diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index a12590dedbc..03d6e9ab503 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -597,7 +597,7 @@ __evict_review( LF_ISSET(WT_EVICT_LOOKASIDE) || F_ISSET(S2BT(session), WT_BTREE_LOOKASIDE) || __wt_txn_visible_all(session, page->modify->rec_max_txn, - WT_TIMESTAMP(page->modify->rec_max_timestamp))); + WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp))); return (0); } diff --git a/src/include/btmem.h b/src/include/btmem.h index 37501a484b0..b694930c8fd 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -885,9 +885,11 @@ struct __wt_ikey { * is done for an entry, WT_UPDATE structures are formed into a forward-linked * list. */ -WT_PACKED_STRUCT_BEGIN(__wt_update) - volatile uint64_t txnid; /* Transaction ID */ - WT_DECL_TIMESTAMP(timestamp) +struct __wt_update { + volatile uint64_t txnid; /* transaction ID */ +#if WT_TIMESTAMP_SIZE == 8 + WT_DECL_TIMESTAMP(timestamp) /* aligned uint64_t timestamp */ +#endif WT_UPDATE *next; /* forward-linked list */ @@ -898,13 +900,20 @@ WT_PACKED_STRUCT_BEGIN(__wt_update) #define WT_UPDATE_RESERVED 2 uint8_t type; /* type (one byte to conserve memory) */ - /* The update includes a complete value. */ + /* If the update includes a complete value. */ #define WT_UPDATE_DATA_VALUE(upd) \ ((upd)->type == WT_UPDATE_STANDARD || (upd)->type == WT_UPDATE_DELETED) - /* The untyped value immediately follows the WT_UPDATE structure. */ -#define WT_UPDATE_DATA(upd) \ - ((void *)((uint8_t *)(upd) + sizeof(WT_UPDATE))) +#if WT_TIMESTAMP_SIZE != 8 + WT_DECL_TIMESTAMP(timestamp) /* unaligned uint8_t array timestamp */ +#endif + + /* + * Zero or more bytes of value (the payload) immediately follows the + * WT_UPDATE structure. We use a C99 flexible array member which has + * the semantics we want. + */ + uint8_t data[]; /* start of the data */ /* * The memory size of an update: include some padding because this is @@ -912,12 +921,12 @@ WT_PACKED_STRUCT_BEGIN(__wt_update) * cache overhead calculation. */ #define WT_UPDATE_MEMSIZE(upd) \ - WT_ALIGN(sizeof(WT_UPDATE) + (upd)->size, 32) -WT_PACKED_STRUCT_END + WT_ALIGN(WT_UPDATE_SIZE + (upd)->size, 32) +}; /* - * WT_UPDATE_SIZE is the expected structure size -- we verify the build to - * ensure the compiler hasn't inserted padding. + * WT_UPDATE_SIZE is the expected structure size excluding the payload data -- + * we verify the build to ensure the compiler hasn't inserted padding. */ #define WT_UPDATE_SIZE (21 + WT_TIMESTAMP_SIZE) diff --git a/src/include/btree.i b/src/include/btree.i index 305de509424..8c9ac576fe3 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1341,8 +1341,8 @@ __wt_page_can_evict( * If the page is clean but has modifications that appear too new to * evict, skip it. */ - if (!modified && !__wt_txn_visible_all( - session, mod->rec_max_txn, WT_TIMESTAMP(mod->rec_max_timestamp))) + if (!modified && !__wt_txn_visible_all(session, + mod->rec_max_txn, WT_TIMESTAMP_NULL(&mod->rec_max_timestamp))) return (false); return (true); diff --git a/src/include/cursor.i b/src/include/cursor.i index 75fd935fc91..d94362a424c 100644 --- a/src/include/cursor.i +++ b/src/include/cursor.i @@ -442,7 +442,7 @@ value: * (if any) is visible. */ if (upd != NULL) { - vb->data = WT_UPDATE_DATA(upd); + vb->data = upd->data; vb->size = upd->size; return (0); } diff --git a/src/include/extern.h b/src/include/extern.h index 9b47542b494..48886ee9dde 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -786,7 +786,7 @@ extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_recover(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name, uint8_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_global_query_timestamp( WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/include/misc.h b/src/include/misc.h index 838086c2ced..bf7d36e19ca 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -251,22 +251,28 @@ /* Timestamp type and helper macros. */ #if WT_TIMESTAMP_SIZE > 0 -#define HAVE_TIMESTAMPS 1 +#define HAVE_TIMESTAMPS #else -#undef HAVE_TIMESTAMPS +#undef HAVE_TIMESTAMPS #endif #ifdef HAVE_TIMESTAMPS -#define WT_TIMESTAMP(x) (x) -typedef uint8_t wt_timestamp_t[WT_TIMESTAMP_SIZE]; -#define WT_DECL_TIMESTAMP(x) wt_timestamp_t x; +struct __wt_timestamp_t { +#if WT_TIMESTAMP_SIZE == 8 + uint64_t val; #else -#define WT_TIMESTAMP(x) (NULL) + uint8_t ts[WT_TIMESTAMP_SIZE]; +#endif +}; +typedef struct __wt_timestamp_t wt_timestamp_t; +#define WT_DECL_TIMESTAMP(x) wt_timestamp_t x; +#define WT_TIMESTAMP_NULL(x) (x) +#else +typedef void wt_timestamp_t; +#define WT_TIMESTAMP_NULL(x) (NULL) #define WT_DECL_TIMESTAMP(x) #endif -#define WT_GET_TIMESTAMP(x) WT_TIMESTAMP((x)->timestamp) - /* * In diagnostic mode we track the locations from which hazard pointers and * scratch buffers were acquired. diff --git a/src/include/txn.i b/src/include/txn.i index 411141495a6..d693633fabe 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -10,14 +10,15 @@ static inline int __wt_txn_id_check(WT_SESSION_IMPL *session); static inline void __wt_txn_read_last(WT_SESSION_IMPL *session); #ifdef HAVE_TIMESTAMPS +#if WT_TIMESTAMP_SIZE == 8 /* * __wt_timestamp_cmp -- * Compare two timestamps. */ static inline int -__wt_timestamp_cmp(const uint8_t *ts1, const uint8_t *ts2) +__wt_timestamp_cmp(const wt_timestamp_t *ts1, const wt_timestamp_t *ts2) { - return (memcmp(ts1, ts2, WT_TIMESTAMP_SIZE)); + return (ts1->val == ts2->val ? 0 : (ts1->val > ts2->val ? 1 : -1)); } /* @@ -25,9 +26,9 @@ __wt_timestamp_cmp(const uint8_t *ts1, const uint8_t *ts2) * Set a timestamp. */ static inline void -__wt_timestamp_set(uint8_t *dest, const uint8_t *src) +__wt_timestamp_set(wt_timestamp_t *dest, const wt_timestamp_t *src) { - (void)memcpy(dest, src, WT_TIMESTAMP_SIZE); + dest->val = src->val; } /* @@ -35,11 +36,62 @@ __wt_timestamp_set(uint8_t *dest, const uint8_t *src) * Check if a timestamp is equal to the special "zero" time. */ static inline bool -__wt_timestamp_iszero(const uint8_t *ts) +__wt_timestamp_iszero(wt_timestamp_t *ts) +{ + return (ts->val == 0); +} + +/* + * __wt_timestamp_set_inf -- + * Set a timestamp to the maximum value. + */ +static inline void +__wt_timestamp_set_inf(wt_timestamp_t *ts) +{ + ts->val = UINT64_MAX; +} + +/* + * __wt_timestamp_set_zero -- + * Zero out a timestamp. + */ +static inline void +__wt_timestamp_set_zero(wt_timestamp_t *ts) +{ + ts->val = 0; +} +#else +/* + * __wt_timestamp_cmp -- + * Compare two timestamps. + */ +static inline int +__wt_timestamp_cmp(const wt_timestamp_t *ts1, const wt_timestamp_t *ts2) +{ + return (memcmp(ts1->ts, ts2->ts, WT_TIMESTAMP_SIZE)); +} + +/* + * __wt_timestamp_set -- + * Set a timestamp. + */ +static inline void +__wt_timestamp_set(wt_timestamp_t *dest, const wt_timestamp_t *src) +{ + (void)memcpy(dest->ts, src->ts, WT_TIMESTAMP_SIZE); +} + +/* + * __wt_timestamp_iszero -- + * Check if a timestamp is equal to the special "zero" time. + */ +static inline bool +__wt_timestamp_iszero(wt_timestamp_t *ts) { static const wt_timestamp_t zero_timestamp; - return (memcmp(ts, zero_timestamp, WT_TIMESTAMP_SIZE) == 0); + return (memcmp(ts->ts, + WT_TIMESTAMP_NULL(&zero_timestamp), WT_TIMESTAMP_SIZE) == 0); } /* @@ -47,9 +99,9 @@ __wt_timestamp_iszero(const uint8_t *ts) * Set a timestamp to the maximum value. */ static inline void -__wt_timestamp_set_inf(uint8_t *ts) +__wt_timestamp_set_inf(wt_timestamp_t *ts) { - memset(ts, 0xff, WT_TIMESTAMP_SIZE); + memset(ts->ts, 0xff, WT_TIMESTAMP_SIZE); } /* @@ -57,11 +109,12 @@ __wt_timestamp_set_inf(uint8_t *ts) * Zero out a timestamp. */ static inline void -__wt_timestamp_set_zero(uint8_t *ts) +__wt_timestamp_set_zero(wt_timestamp_t *ts) { - memset(ts, 0x00, WT_TIMESTAMP_SIZE); + memset(ts->ts, 0x00, WT_TIMESTAMP_SIZE); } -#endif +#endif /* WT_TIMESTAMP_SIZE == 8 */ +#endif /* HAVE_TIMESTAMPS */ /* * __txn_next_op -- @@ -130,7 +183,7 @@ __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_TXN_OP_INMEM : WT_TXN_OP_BASIC; #ifdef HAVE_TIMESTAMPS if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) { - __wt_timestamp_set(upd->timestamp, txn->commit_timestamp); + __wt_timestamp_set(&upd->timestamp, &txn->commit_timestamp); if (!F_ISSET(session, WT_SESSION_LOGGING_INMEM)) op->type = WT_TXN_OP_BASIC_TS; } @@ -238,7 +291,7 @@ __txn_visible_all_id(WT_SESSION_IMPL *session, uint64_t id) */ static inline bool __wt_txn_visible_all( - WT_SESSION_IMPL *session, uint64_t id, const uint8_t *timestamp) + WT_SESSION_IMPL *session, uint64_t id, const wt_timestamp_t *timestamp) { if (!__txn_visible_all_id(session, id)) return (false); @@ -253,7 +306,7 @@ __wt_txn_visible_all( return (true); __wt_readlock(session, &txn_global->rwlock); - cmp = __wt_timestamp_cmp(timestamp, txn_global->pinned_timestamp); + cmp = __wt_timestamp_cmp(timestamp, &txn_global->pinned_timestamp); __wt_readunlock(session, &txn_global->rwlock); /* @@ -279,7 +332,7 @@ static inline bool __wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd) { return (__wt_txn_visible_all( - session, upd->txnid, WT_GET_TIMESTAMP(upd))); + session, upd->txnid, WT_TIMESTAMP_NULL(&upd->timestamp))); } /* @@ -341,7 +394,7 @@ __txn_visible_id(WT_SESSION_IMPL *session, uint64_t id) */ static inline bool __wt_txn_visible( - WT_SESSION_IMPL *session, uint64_t id, const uint8_t *timestamp) + WT_SESSION_IMPL *session, uint64_t id, const wt_timestamp_t *timestamp) { if (!__txn_visible_id(session, id)) return (false); @@ -354,7 +407,7 @@ __wt_txn_visible( if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) || timestamp == NULL) return (true); - return (memcmp(timestamp, txn->read_timestamp, WT_TIMESTAMP_SIZE) <= 0); + return (__wt_timestamp_cmp(timestamp, &txn->read_timestamp) <= 0); } #else WT_UNUSED(timestamp); @@ -369,7 +422,8 @@ __wt_txn_visible( static inline bool __wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd) { - return (__wt_txn_visible(session, upd->txnid, WT_GET_TIMESTAMP(upd))); + return (__wt_txn_visible(session, + upd->txnid, WT_TIMESTAMP_NULL(&upd->timestamp))); } /* diff --git a/src/include/verify_build.h b/src/include/verify_build.h index 57189b5c2b2..3973f786a90 100644 --- a/src/include/verify_build.h +++ b/src/include/verify_build.h @@ -52,7 +52,15 @@ __wt_verify_build(void) /* Check specific structures weren't padded. */ WT_SIZE_CHECK(WT_BLOCK_DESC, WT_BLOCK_DESC_SIZE); WT_SIZE_CHECK(WT_REF, WT_REF_SIZE); - WT_SIZE_CHECK(WT_UPDATE, WT_UPDATE_SIZE); + + /* + * WT_UPDATE is special: we arrange fields to avoid padding within the + * structure but it could be padded at the end depending on the + * timestamp size. Further check that the data field in the update + * structure is where we expect it. + */ + WT_SIZE_CHECK(WT_UPDATE, WT_ALIGN(WT_UPDATE_SIZE, 8)); + WT_STATIC_ASSERT(offsetof(WT_UPDATE, data) == WT_UPDATE_SIZE); /* Check specific structures were padded. */ #define WT_PADDING_CHECK(s) \ diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index 74fdc4c3925..84617dfcab8 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -290,6 +290,8 @@ struct __wt_thread; typedef struct __wt_thread WT_THREAD; struct __wt_thread_group; typedef struct __wt_thread_group WT_THREAD_GROUP; +struct __wt_timestamp_t; + typedef struct __wt_timestamp_t WT_TIMESTAMP_T; struct __wt_txn; typedef struct __wt_txn WT_TXN; struct __wt_txn_global; diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index ae138301357..3f42373eaca 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -687,7 +687,7 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r) */ mod->rec_max_txn = r->max_txn; #ifdef HAVE_TIMESTAMPS - __wt_timestamp_set(mod->rec_max_timestamp, r->max_timestamp); + __wt_timestamp_set(&mod->rec_max_timestamp, &r->max_timestamp); #endif /* @@ -703,9 +703,9 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r) btree->rec_max_txn = r->max_txn; #ifdef HAVE_TIMESTAMPS if (__wt_timestamp_cmp( - btree->rec_max_timestamp, r->max_timestamp) < 0) - __wt_timestamp_set( - btree->rec_max_timestamp, r->max_timestamp); + &btree->rec_max_timestamp, &r->max_timestamp) < 0) + __wt_timestamp_set(&btree->rec_max_timestamp, + &r->max_timestamp); #endif } @@ -1152,7 +1152,7 @@ __rec_update_save(WT_SESSION_IMPL *session, #ifdef HAVE_TIMESTAMPS if (upd != NULL) __wt_timestamp_set( - r->supd[r->supd_next].onpage_timestamp, upd->timestamp); + &r->supd[r->supd_next].onpage_timestamp, &upd->timestamp); #endif ++r->supd_next; return (0); @@ -1217,8 +1217,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, update_mem = 0; max_txn = WT_TXN_NONE; #ifdef HAVE_TIMESTAMPS - __wt_timestamp_set_zero(max_timestamp); - __wt_timestamp_set_inf(min_timestamp); + __wt_timestamp_set_zero(&max_timestamp); + __wt_timestamp_set_inf(&min_timestamp); #endif min_txn = UINT64_MAX; @@ -1271,14 +1271,14 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, #ifdef HAVE_TIMESTAMPS /* Track min/max timestamps. */ if (__wt_timestamp_cmp( - max_timestamp, upd->timestamp) < 0) + &max_timestamp, &upd->timestamp) < 0) __wt_timestamp_set( - max_timestamp, upd->timestamp); + &max_timestamp, &upd->timestamp); if (__wt_timestamp_cmp( - min_timestamp, upd->timestamp) > 0) + &min_timestamp, &upd->timestamp) > 0) __wt_timestamp_set( - min_timestamp, upd->timestamp); + &min_timestamp, &upd->timestamp); #endif } } else @@ -1334,8 +1334,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, if (WT_TXNID_LT(r->max_txn, max_txn)) r->max_txn = max_txn; #ifdef HAVE_TIMESTAMPS - if (__wt_timestamp_cmp(r->max_timestamp, max_timestamp) < 0) - __wt_timestamp_set(r->max_timestamp, max_timestamp); + if (__wt_timestamp_cmp(&r->max_timestamp, &max_timestamp) < 0) + __wt_timestamp_set(&r->max_timestamp, &max_timestamp); #endif /* @@ -1352,7 +1352,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, */ if (!skipped && (F_ISSET(btree, WT_BTREE_LOOKASIDE) || __wt_txn_visible_all(session, - max_txn, WT_TIMESTAMP(max_timestamp)))) { + max_txn, WT_TIMESTAMP_NULL(&max_timestamp)))) { #ifdef HAVE_DIAGNOSTIC /* * The checkpoint transaction is special. Make sure we never @@ -1448,7 +1448,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, * globally visible, readers require the page's original value. */ if (!__wt_txn_visible_all( - session, min_txn, WT_TIMESTAMP(min_timestamp))) + session, min_txn, WT_TIMESTAMP_NULL(&min_timestamp))) append_origv = true; } @@ -1558,7 +1558,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session, */ if (F_ISSET(r, WT_VISIBILITY_ERR) && page_del != NULL && !__wt_txn_visible(session, - page_del->txnid, WT_GET_TIMESTAMP(page_del))) + page_del->txnid, WT_TIMESTAMP_NULL(&page_del->timestamp))) WT_PANIC_RET(session, EINVAL, "reconciliation illegally skipped an update"); @@ -1588,7 +1588,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session, */ if (ref->addr != NULL && (page_del == NULL || __wt_txn_visible_all( - session, page_del->txnid, WT_GET_TIMESTAMP(page_del)))) + session, page_del->txnid, WT_TIMESTAMP_NULL(&page_del->timestamp)))) WT_RET(__wt_ref_block_free(session, ref)); /* @@ -1639,7 +1639,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session, * address normally. Otherwise, we have to write a proxy record. */ if (__wt_txn_visible( - session, page_del->txnid, WT_GET_TIMESTAMP(page_del))) + session, page_del->txnid, WT_TIMESTAMP_NULL(&page_del->timestamp))) *statep = WT_CHILD_PROXY; return (0); @@ -3777,7 +3777,7 @@ __rec_update_las(WT_SESSION_IMPL *session, continue; #ifdef HAVE_TIMESTAMPS - las_timestamp.data = list->onpage_timestamp; + las_timestamp.data = &list->onpage_timestamp; las_timestamp.size = WT_TIMESTAMP_SIZE; #endif cursor->set_key(cursor, @@ -3787,11 +3787,11 @@ __rec_update_las(WT_SESSION_IMPL *session, if (upd->type == WT_UPDATE_DELETED) las_value.size = 0; else { - las_value.data = WT_UPDATE_DATA(upd); + las_value.data = upd->data; las_value.size = upd->size; } #ifdef HAVE_TIMESTAMPS - las_timestamp.data = upd->timestamp; + las_timestamp.data = &upd->timestamp; las_timestamp.size = WT_TIMESTAMP_SIZE; #endif cursor->set_value(cursor, @@ -4325,7 +4325,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) if (upd != NULL) __bit_setv(r->first_free, WT_INSERT_RECNO(ins) - pageref->ref_recno, - btree->bitcnt, *(uint8_t *)WT_UPDATE_DATA(upd)); + btree->bitcnt, *upd->data); } /* Calculate the number of entries per page remainder. */ @@ -4382,8 +4382,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) if (nrecs > 0) { __bit_setv(r->first_free, entry, btree->bitcnt, - upd == NULL ? 0 : - *(uint8_t *)WT_UPDATE_DATA(upd)); + upd == NULL ? 0 : *upd->data); --nrecs; ++entry; ++r->recno; @@ -4725,7 +4724,7 @@ record_loop: /* deleted = upd->type == WT_UPDATE_DELETED; if (!deleted) { - data = WT_UPDATE_DATA(upd); + data = upd->data; size = upd->size; } } else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) { @@ -4952,7 +4951,7 @@ compare: /* deleted = upd == NULL || upd->type == WT_UPDATE_DELETED; if (!deleted) { - data = WT_UPDATE_DATA(upd); + data = upd->data; size = upd->size; } } @@ -5537,8 +5536,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, val->cell_len = val->len = val->buf.size = 0; } else { WT_ERR(__rec_cell_build_val(session, r, - WT_UPDATE_DATA(upd), upd->size, - (uint64_t)0)); + upd->data, upd->size, (uint64_t)0)); dictionary = true; } } @@ -5706,7 +5704,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) val->len = 0; else WT_RET(__rec_cell_build_val(session, r, - WT_UPDATE_DATA(upd), upd->size, (uint64_t)0)); + upd->data, upd->size, (uint64_t)0)); /* Build key cell. */ WT_RET(__rec_cell_build_leaf_key(session, r, diff --git a/src/txn/txn.c b/src/txn/txn.c index 2169b8a9de2..5ef5cc8cd84 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -444,13 +444,13 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) wt_timestamp_t oldest_timestamp; WT_RET(__wt_txn_parse_timestamp( - session, "read", txn->read_timestamp, &cval)); + session, "read", &txn->read_timestamp, &cval)); __wt_readlock(session, &txn_global->rwlock); __wt_timestamp_set( - oldest_timestamp, txn_global->oldest_timestamp); + &oldest_timestamp, &txn_global->oldest_timestamp); __wt_readunlock(session, &txn_global->rwlock); if (__wt_timestamp_cmp( - txn->read_timestamp, oldest_timestamp) < 0) + &txn->read_timestamp, &oldest_timestamp) < 0) WT_RET_MSG(session, EINVAL, "read timestamp %.*s older than oldest timestamp", (int)cval.len, cval.str); @@ -590,7 +590,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) if (cval.len != 0) { #ifdef HAVE_TIMESTAMPS WT_ERR(__wt_txn_parse_timestamp( - session, "commit", txn->commit_timestamp, &cval)); + session, "commit", &txn->commit_timestamp, &cval)); __wt_txn_set_commit_timestamp(session); #else WT_ERR_MSG(session, EINVAL, "commit_timestamp requires a " @@ -686,8 +686,8 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) #ifdef HAVE_TIMESTAMPS if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && op->type != WT_TXN_OP_BASIC_TS) - __wt_timestamp_set(op->u.upd->timestamp, - txn->commit_timestamp); + __wt_timestamp_set(&op->u.upd->timestamp, + &txn->commit_timestamp); #endif break; @@ -695,8 +695,8 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) #ifdef HAVE_TIMESTAMPS if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) __wt_timestamp_set( - op->u.ref->page_del->timestamp, - txn->commit_timestamp); + &op->u.ref->page_del->timestamp, + &txn->commit_timestamp); #endif break; @@ -728,10 +728,10 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) if (update_timestamp) { __wt_readlock(session, &txn_global->rwlock); __wt_timestamp_set( - prev_commit_timestamp, txn_global->commit_timestamp); + &prev_commit_timestamp, &txn_global->commit_timestamp); __wt_readunlock(session, &txn_global->rwlock); update_timestamp = __wt_timestamp_cmp( - txn->commit_timestamp, prev_commit_timestamp) > 0; + &txn->commit_timestamp, &prev_commit_timestamp) > 0; } /* @@ -740,10 +740,10 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) */ if (update_timestamp) { __wt_writelock(session, &txn_global->rwlock); - if (__wt_timestamp_cmp(txn->commit_timestamp, - txn_global->commit_timestamp) > 0) { - __wt_timestamp_set(txn_global->commit_timestamp, - txn->commit_timestamp); + if (__wt_timestamp_cmp(&txn->commit_timestamp, + &txn_global->commit_timestamp) > 0) { + __wt_timestamp_set(&txn_global->commit_timestamp, + &txn->commit_timestamp); txn_global->has_commit_timestamp = true; } __wt_writeunlock(session, &txn_global->rwlock); @@ -1008,7 +1008,7 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session) * Now that all transactions have completed, no timestamps should be * pinned. */ - memset(txn_global->pinned_timestamp, 0xff, WT_TIMESTAMP_SIZE); + __wt_timestamp_set_inf(&txn_global->pinned_timestamp); #endif return (ret); diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index be72c72714e..d21c68f6681 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -1682,7 +1682,7 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) WT_RET(__wt_txn_update_oldest( session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT)); return (__wt_txn_visible_all(session, btree->rec_max_txn, - WT_TIMESTAMP(btree->rec_max_timestamp)) ? + WT_TIMESTAMP_NULL(&btree->rec_max_timestamp)) ? __wt_cache_op(session, WT_SYNC_DISCARD) : EBUSY); } diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index d291139284a..abe4c085e50 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -74,7 +74,7 @@ __txn_op_log(WT_SESSION_IMPL *session, cursor = &cbt->iface; upd = op->u.upd; - value.data = WT_UPDATE_DATA(upd); + value.data = upd->data; value.size = upd->size; /* diff --git a/src/txn/txn_timestamp.c b/src/txn/txn_timestamp.c index 9a2eb233227..fbbec33c325 100644 --- a/src/txn/txn_timestamp.c +++ b/src/txn/txn_timestamp.c @@ -15,15 +15,8 @@ */ int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, - const char *name, uint8_t *timestamp, WT_CONFIG_ITEM *cval) + const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) { - WT_DECL_RET; - WT_ITEM ts; - wt_timestamp_t tsbuf; - size_t hexlen; - const char *hexts; - char padbuf[2 * WT_TIMESTAMP_SIZE + 1]; - __wt_timestamp_set_zero(timestamp); if (cval->len == 0) @@ -35,6 +28,40 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, "Failed to parse %s timestamp '%.*s': too long", name, (int)cval->len, cval->str); +#if WT_TIMESTAMP_SIZE == 8 + { + static const u_char hextable[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 0, 0, 0, 0, 0, 0, + 0, 10, 11, 12, 13, 14, 15, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 10, 11, 12, 13, 14, 15 + }; + wt_timestamp_t ts; + size_t len; + const char *hex; + + for (ts.val = 0, hex = cval->str, len = cval->len; len > 0; --len) + ts.val = (ts.val << 4) | hextable[(int)*hex++]; + __wt_timestamp_set(timestamp, &ts); + } +#else + { + WT_DECL_RET; + WT_ITEM ts; + wt_timestamp_t tsbuf; + size_t hexlen; + const char *hexts; + char padbuf[2 * WT_TIMESTAMP_SIZE + 1]; + /* * The decoding function assumes it is decoding data produced by dump * and so requires an even number of hex digits. @@ -50,8 +77,8 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, } /* Avoid memory allocation to decode timestamps. */ - ts.data = ts.mem = tsbuf; - ts.memsize = sizeof(tsbuf); + ts.data = ts.mem = tsbuf.ts; + ts.memsize = sizeof(tsbuf.ts); if ((ret = __wt_nhex_to_raw(session, hexts, hexlen, &ts)) != 0) WT_RET_MSG(session, ret, "Failed to parse %s timestamp '%.*s'", @@ -59,15 +86,16 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, WT_ASSERT(session, ts.size <= WT_TIMESTAMP_SIZE); /* Copy the raw value to the end of the timestamp. */ - memcpy(timestamp + WT_TIMESTAMP_SIZE - ts.size, + memcpy(timestamp->ts + WT_TIMESTAMP_SIZE - ts.size, ts.data, ts.size); - + } +#endif if (__wt_timestamp_iszero(timestamp)) WT_RET_MSG(session, EINVAL, "Failed to parse %s timestamp '%.*s': zero not permitted", name, (int)cval->len, cval->str); - return (ret); + return (0); } /* @@ -76,12 +104,13 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, */ static int __txn_global_query_timestamp( - WT_SESSION_IMPL *session, uint8_t *ts, const char *cfg[]) + WT_SESSION_IMPL *session, wt_timestamp_t *tsp, const char *cfg[]) { WT_CONNECTION_IMPL *conn; WT_CONFIG_ITEM cval; WT_TXN *txn; WT_TXN_GLOBAL *txn_global; + wt_timestamp_t ts; conn = S2C(session); txn_global = &conn->txn_global; @@ -91,41 +120,42 @@ __txn_global_query_timestamp( if (!txn_global->has_commit_timestamp) return (WT_NOTFOUND); __wt_readlock(session, &txn_global->rwlock); - __wt_timestamp_set(ts, txn_global->commit_timestamp); + __wt_timestamp_set(&ts, &txn_global->commit_timestamp); __wt_readunlock(session, &txn_global->rwlock); /* Compare with the oldest running transaction. */ __wt_readlock(session, &txn_global->commit_timestamp_rwlock); txn = TAILQ_FIRST(&txn_global->commit_timestamph); if (txn != NULL && - __wt_timestamp_cmp(txn->commit_timestamp, ts) < 0) - __wt_timestamp_set(ts, txn->commit_timestamp); + __wt_timestamp_cmp(&txn->commit_timestamp, &ts) < 0) + __wt_timestamp_set(&ts, &txn->commit_timestamp); __wt_readunlock(session, &txn_global->commit_timestamp_rwlock); } else if (WT_STRING_MATCH("oldest_reader", cval.str, cval.len)) { if (!txn_global->has_oldest_timestamp) return (WT_NOTFOUND); __wt_readlock(session, &txn_global->rwlock); - __wt_timestamp_set(ts, txn_global->oldest_timestamp); + __wt_timestamp_set(&ts, &txn_global->oldest_timestamp); /* Check for a running checkpoint */ txn = txn_global->checkpoint_txn; if (txn_global->checkpoint_state.pinned_id != WT_TXN_NONE && - !__wt_timestamp_iszero(txn->read_timestamp) && - __wt_timestamp_cmp(txn->read_timestamp, ts) < 0) - __wt_timestamp_set(ts, txn->read_timestamp); + !__wt_timestamp_iszero(&txn->read_timestamp) && + __wt_timestamp_cmp(&txn->read_timestamp, &ts) < 0) + __wt_timestamp_set(&ts, &txn->read_timestamp); __wt_readunlock(session, &txn_global->rwlock); /* Look for the oldest ordinary reader. */ __wt_readlock(session, &txn_global->read_timestamp_rwlock); txn = TAILQ_FIRST(&txn_global->read_timestamph); if (txn != NULL && - __wt_timestamp_cmp(txn->read_timestamp, ts) < 0) - __wt_timestamp_set(ts, txn->read_timestamp); + __wt_timestamp_cmp(&txn->read_timestamp, &ts) < 0) + __wt_timestamp_set(&ts, &txn->read_timestamp); __wt_readunlock(session, &txn_global->read_timestamp_rwlock); } else WT_RET_MSG(session, EINVAL, "unknown timestamp query %.*s", (int)cval.len, cval.str); + __wt_timestamp_set(tsp, &ts); return (0); } #endif @@ -139,8 +169,28 @@ __wt_txn_global_query_timestamp( WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[]) { #ifdef HAVE_TIMESTAMPS - WT_ITEM hexts; wt_timestamp_t ts; + + WT_RET(__txn_global_query_timestamp(session, &ts, cfg)); + +#if WT_TIMESTAMP_SIZE == 8 + { + char *p, v; + + for (p = hex_timestamp; ts.val != 0; ts.val >>= 4) + *p++ = (char)__wt_hex((u_char)(ts.val & 0x0f)); + *p = '\0'; + + /* Reverse the string. */ + for (--p; p > hex_timestamp;) { + v = *p; + *p-- = *hex_timestamp; + *hex_timestamp++ = v; + } + } +#else + { + WT_ITEM hexts; size_t len; uint8_t *tsp; @@ -148,18 +198,20 @@ __wt_txn_global_query_timestamp( * Keep clang-analyzer happy: it can't tell that ts will be set * whenever the call below succeeds. */ - __wt_timestamp_set_zero(ts); - WT_RET(__txn_global_query_timestamp(session, ts, cfg)); + __wt_timestamp_set_zero(&ts); + WT_RET(__txn_global_query_timestamp(session, &ts, cfg)); /* Avoid memory allocation: set up an item guaranteed large enough. */ hexts.data = hexts.mem = hex_timestamp; hexts.memsize = 2 * WT_TIMESTAMP_SIZE + 1; /* Trim leading zeros. */ - for (tsp = ts, len = WT_TIMESTAMP_SIZE; + for (tsp = ts.ts, len = WT_TIMESTAMP_SIZE; len > 0 && *tsp == 0; ++tsp, --len) ; WT_RET(__wt_raw_to_hex(session, tsp, len, &hexts)); + } +#endif return (0); #else WT_UNUSED(hex_timestamp); @@ -193,28 +245,28 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session) return (0); __wt_readlock(session, &txn_global->rwlock); - __wt_timestamp_set(oldest_timestamp, txn_global->oldest_timestamp); + __wt_timestamp_set(&oldest_timestamp, &txn_global->oldest_timestamp); __wt_readunlock(session, &txn_global->rwlock); /* Scan to find the global pinned timestamp. */ if ((ret = __txn_global_query_timestamp( - session, active_timestamp, query_cfg)) != 0) + session, &active_timestamp, query_cfg)) != 0) return (ret == WT_NOTFOUND ? 0 : ret); - if (__wt_timestamp_cmp(oldest_timestamp, active_timestamp) < 0) { - __wt_timestamp_set(pinned_timestamp, oldest_timestamp); + if (__wt_timestamp_cmp(&oldest_timestamp, &active_timestamp) < 0) { + __wt_timestamp_set(&pinned_timestamp, &oldest_timestamp); } else - __wt_timestamp_set(pinned_timestamp, active_timestamp); + __wt_timestamp_set(&pinned_timestamp, &active_timestamp); __wt_writelock(session, &txn_global->rwlock); if (!txn_global->has_pinned_timestamp || __wt_timestamp_cmp( - txn_global->pinned_timestamp, pinned_timestamp) < 0) { + &txn_global->pinned_timestamp, &pinned_timestamp) < 0) { __wt_timestamp_set( - txn_global->pinned_timestamp, pinned_timestamp); + &txn_global->pinned_timestamp, &pinned_timestamp); txn_global->has_pinned_timestamp = true; txn_global->oldest_is_pinned = __wt_timestamp_cmp( - txn_global->pinned_timestamp, - txn_global->oldest_timestamp) == 0; + &txn_global->pinned_timestamp, + &txn_global->oldest_timestamp) == 0; } __wt_writeunlock(session, &txn_global->rwlock); @@ -242,7 +294,7 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) wt_timestamp_t oldest_timestamp; WT_RET(__wt_txn_parse_timestamp( - session, "oldest", oldest_timestamp, &cval)); + session, "oldest", &oldest_timestamp, &cval)); /* * This method can be called from multiple threads, check that @@ -251,9 +303,9 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) txn_global = &S2C(session)->txn_global; __wt_writelock(session, &txn_global->rwlock); if (!txn_global->has_oldest_timestamp || __wt_timestamp_cmp( - txn_global->oldest_timestamp, oldest_timestamp) < 0) { + &txn_global->oldest_timestamp, &oldest_timestamp) < 0) { __wt_timestamp_set( - txn_global->oldest_timestamp, oldest_timestamp); + &txn_global->oldest_timestamp, &oldest_timestamp); txn_global->has_oldest_timestamp = true; txn_global->oldest_is_pinned = false; } @@ -288,7 +340,7 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_TXN *txn = &session->txn; WT_RET(__wt_txn_parse_timestamp( - session, "commit", txn->commit_timestamp, &cval)); + session, "commit", &txn->commit_timestamp, &cval)); __wt_txn_set_commit_timestamp(session); #else WT_RET_MSG(session, EINVAL, "commit_timestamp requires a " @@ -320,7 +372,7 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session) __wt_writelock(session, &txn_global->commit_timestamp_rwlock); for (prev = TAILQ_LAST(&txn_global->commit_timestamph, __wt_txn_cts_qh); prev != NULL && __wt_timestamp_cmp( - prev->commit_timestamp, txn->commit_timestamp) > 0; + &prev->commit_timestamp, &txn->commit_timestamp) > 0; prev = TAILQ_PREV(prev, __wt_txn_cts_qh, commit_timestampq)) ; if (prev == NULL) @@ -374,7 +426,7 @@ __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session) __wt_writelock(session, &txn_global->read_timestamp_rwlock); for (prev = TAILQ_LAST(&txn_global->read_timestamph, __wt_txn_rts_qh); prev != NULL && __wt_timestamp_cmp( - prev->read_timestamp, txn->read_timestamp) > 0; + &prev->read_timestamp, &txn->read_timestamp) > 0; prev = TAILQ_PREV(prev, __wt_txn_rts_qh, read_timestampq)) ; if (prev == NULL) |