diff options
author | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2022-08-05 14:49:44 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-05 19:20:56 +0000 |
commit | ea949cefad9d815009a959c0a17fd023db3b4598 (patch) | |
tree | d977a06c083d7ffe1de62fd36a43dbed3c35b119 /buildscripts | |
parent | bb48bf70f1dc9bcd76ca42df992f9d8da048ea91 (diff) | |
download | mongo-ea949cefad9d815009a959c0a17fd023db3b4598.tar.gz |
SERVER-68455: GDB methods for dumping in-memory WT trees.
Diffstat (limited to 'buildscripts')
-rw-r--r-- | buildscripts/gdb/mongo_printers.py | 12 | ||||
-rw-r--r-- | buildscripts/gdb/wt_dump_table.py | 215 |
2 files changed, 226 insertions, 1 deletions
diff --git a/buildscripts/gdb/mongo_printers.py b/buildscripts/gdb/mongo_printers.py index a0dcc6ad70c..c2cb0778fbd 100644 --- a/buildscripts/gdb/mongo_printers.py +++ b/buildscripts/gdb/mongo_printers.py @@ -679,8 +679,18 @@ class WtUpdateToBsonPrinter(object): def children(self): """children.""" + if self.val['type'] != 3: + # Type 3 is a "normal" update. Notably type 4 is a deletion and type 1 represents a + # delta relative to the previous committed version in the update chain. Only attempt + # to parse type 3 as bson. + return + memory = gdb.selected_inferior().read_memory(self.ptr, self.size).tobytes() - bsonobj = next(bson.decode_iter(memory)) # pylint: disable=stop-iteration-return + bsonobj = None + try: + bsonobj = next(bson.decode_iter(memory)) # pylint: disable=stop-iteration-return + except bson.errors.InvalidBSON: + return for key, value in list(bsonobj.items()): yield 'key', key diff --git a/buildscripts/gdb/wt_dump_table.py b/buildscripts/gdb/wt_dump_table.py new file mode 100644 index 00000000000..75c4a92bb7f --- /dev/null +++ b/buildscripts/gdb/wt_dump_table.py @@ -0,0 +1,215 @@ +import gdb +import bson +from pprint import pprint + +DEBUGGING = False +''' +Public API to be called by users. The input `ident` is a string of the form: + 'collection-2--4547167393143767234'. + From within gdb type: + python dump_pages_for_table('collection-2--4547167393143767234') + +Some behaviors/limitations: +* Disk images of data are not deserialized into their separate key/value pairs. +* If update chain WT_UPDATEs are valid bson, the values will be parsed and output as BSON maps. +* If updates are not bson (e.g: index entries), they will be output as a raw byte array. +* WT_UPDATE structures have a pretty printer registered. Disabling pretty printers will result in + more raw output. +* Any `file:*.wt` can be output, e.g: `_mdb_catalog` or `WiredTiger`. Though the output may be less + supported/of lower quality. +''' + + +def dump_pages_for_table(ident): + conn_impl_type = gdb.lookup_type("WT_CONNECTION_IMPL") + if not conn_impl_type: + print('WT_CONNECTION_IMPL type not found. Try invoking this function from a different \ +thread and frame.') + return + + conn_impl_ptr_type = conn_impl_type.pointer() + dbg('impl', conn_impl_ptr_type) + + conn_ptr = None + try: + conn_ptr = gdb.parse_and_eval("session->iface->connection") + except gdb.error: + pass + + if not conn_ptr or not conn_ptr.address: + print( + 'Failed to find a suitable `WT_SESSION session` object to extract a connection object \ +from. Try finding an eviction thread and frame, e.g: `__wt_evict_thread_run`. If the session is \ +optimized out, try going up stack frames until the variable is in a local scope rather than a \ +function input.') + return + + conn = conn_ptr.reinterpret_cast(conn_impl_ptr_type).dereference() + dbg('conn', conn) + data_handle, all_dhs = get_data_handle(conn, 'file:{}.wt'.format(ident)) + if not data_handle: + print('Data handle not found for ident. Ident: `{}`'.format(ident)) + print('All known data handles:') + pprint(all_dhs) + return + + dump_handle(data_handle) + + +# Private API. +def dbg(ident, var): + if not DEBUGGING: + return + + print('----------') + if type(var) == gdb.Value: + print('{}: ({}*){}'.format(ident, var.type, var.address)) + else: + print(ident) + print(' ' + str(type(var))) + methods = dir(var) + out = [name for name in methods if not name.startswith("__")] + for item in out: + print(' ' + item) + + if type(var) == gdb.Value: + print('\n Fields:') + print('\t' + '\n\t'.join(str(var).split('\n'))) + + +def walk_wt_list(lst): + ret = [] + node = lst['tqh_first'] + dbg('node', node) + while True: + if not node: + break + ret.append(node.dereference()) + node = node['q']['tqe_next'] + + return ret + + +def get_data_handle(conn, handle_name): + dbg('datahandles', conn['dhqh']) + ret = None + all_file_dhs = [] + for handle in walk_wt_list(conn['dhqh']): + if handle['name'].string().startswith('file:'): + all_file_dhs.append(handle['name'].string()[5:-3]) + if handle['name'].string() == handle_name: + ret = handle + + return ret, all_file_dhs + + +def get_btree_handle(dhandle): + btree = gdb.lookup_type('WT_BTREE').pointer() + return dhandle['handle'].reinterpret_cast(btree).dereference() + + +def dump_update_chain(update_chain): + while True: + if not update_chain: + print(' λ (End of update chain)') + break + dbg('update', update_chain) + wt_val = update_chain.dereference() + obj = None + dbg('wt_val', wt_val) + val_bytes = gdb.selected_inferior().read_memory(wt_val['data'], wt_val['size']) + can_bson = wt_val['type'] == 3 + if can_bson: + try: + obj = bson.decode_all(val_bytes)[0] + except: + pass + print(' ' + '\n '.join(str(wt_val).split('\n')) + " " + str(obj) + " =>") + + update_chain = update_chain['next'] + + +def dump_insert_list(wt_insert): + key_struct = wt_insert['u']['key'] + key = gdb.selected_inferior().read_memory( + int(wt_insert.address) + key_struct['offset'], key_struct['size']).tobytes() + print('Key: ' + str(key)) + print('Value:') + update_chain = wt_insert['upd'] + dump_update_chain(update_chain) + + +def dump_skip_list(wt_insert_head): + if not wt_insert_head['head'].address: + return + q = wt_insert_head['head'] + wt_insert = wt_insert_head['head'][0] + idx = 0 + while True: + if not wt_insert: + break + dump_insert_list(wt_insert.dereference()) + dbg('insert' + str(idx), wt_insert.dereference()) + idx += 1 + wt_insert = wt_insert['next'][0] + + +def dump_modified(leaf_page): + print("Modify:") + if not leaf_page['modify']: + print("No modifies") + return + + leaf_modify = leaf_page['modify'].dereference() + dbg('modify', leaf_modify) + row_leaf_insert = leaf_modify['u2']['row_leaf']['insert'] + dbg('row store', row_leaf_insert) + if not row_leaf_insert: + print("No insert list") + else: + print("Insert list:") + dump_skip_list(row_leaf_insert.dereference().dereference()) + + row_leaf_update = leaf_modify['u2']['row_leaf']['update'] + if not row_leaf_update: + print("No update list") + else: + print("Update list:") + leaf_num_entries = int(leaf_page['entries']) + for i in range(0, leaf_num_entries): + dump_update_chain(row_leaf_update[i]) + + +def dump_disk(leaf_page): + leaf_num_entries = int(leaf_page['entries']) + dbg('in-memory page:', leaf_page) + dsk = leaf_page['dsk'].dereference() + if int(dsk.address) == 0: + print("No page loaded from disk.") + return + dbg('on-disk page:', dsk) + wt_page_header_size = 28 + wt_block_header_size = 12 + page_bytes = gdb.selected_inferior().read_memory( + int(dsk.address) + wt_page_header_size + wt_block_header_size, + int(dsk['mem_size'])).tobytes() + print("Dsk:\n" + str(page_bytes)) + + +def dump_handle(dhandle): + print("Dumping: " + dhandle['name'].string()) + btree = get_btree_handle(dhandle) + root = btree['root'] + root_page = root['page'].dereference() + dbg('btree', btree) + dbg('root', btree['root']) + dbg('root page', root_page) + rpindex = root_page['u']['intl']['__index'].dereference() + leaf_num_entries = int(rpindex['entries']) + for idx in range(0, leaf_num_entries): + dbg('rpindex', rpindex) + dbg('rp-pre-index', rpindex['index'].dereference().dereference()) + leaf_page = rpindex['index'][idx].dereference()['page'].dereference() + dbg('leaf', leaf_page) + dump_disk(leaf_page) + dump_modified(leaf_page) |