diff options
author | Luke Chen <luke.chen@mongodb.com> | 2021-12-30 14:13:27 +1100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-12-30 03:47:13 +0000 |
commit | d84b3fd558566046a94a32b5315566848de18062 (patch) | |
tree | d12df6f63af7d7f7f396ecba3e45f4d0dc25315f | |
parent | 9175c0ded9d2714b9b126fa9c036a0e8015e7796 (diff) | |
download | mongo-d84b3fd558566046a94a32b5315566848de18062.tar.gz |
Import wiredtiger: eaf625d02f3cc4d1381eb1ca6d6fcd90ff859aac from branch mongodb-5.2
ref: 91d0ca7039..eaf625d02f
for: 5.2.0-rc4
WT-8523 Create script that can output MongoDB tables into BSON format
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rwxr-xr-x | src/third_party/wiredtiger/tools/wt_to_mdb_bson.py | 76 |
2 files changed, 77 insertions, 1 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 6b96758a20d..38bf67a458a 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-5.2", - "commit": "91d0ca7039b960e8de6241925b43eca47c3ef813" + "commit": "eaf625d02f3cc4d1381eb1ca6d6fcd90ff859aac" } diff --git a/src/third_party/wiredtiger/tools/wt_to_mdb_bson.py b/src/third_party/wiredtiger/tools/wt_to_mdb_bson.py new file mode 100755 index 00000000000..353ee8c672d --- /dev/null +++ b/src/third_party/wiredtiger/tools/wt_to_mdb_bson.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# +# Public Domain 2014-present MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import bson, codecs, pprint, subprocess, sys + +# Decodes a MongoDB file into a readable format. +def util_usage(): + print("Usage: wt_to_mdb_bson <path_to_wt> filename") + +# Navigate to the data section of the MongoDB file if it exists. +def find_data_section(mdb_file_contents): + for i in range(len(mdb_file_contents)): + line = mdb_file_contents[i].strip() + if line == 'Data': + return i + 1 + + # No data section was found, return an invalid index. + return -1 + +# Decode the keys and values from hex format to a readable BSON format. +def decode_data_section(mdb_file_contents, data_index): + # Loop through the data section and increment by 2, since we parse the K/V pairs. + for i in range(data_index, len(mdb_file_contents), 2): + key = mdb_file_contents[i].strip() + value = mdb_file_contents[i + 1].strip() + + byt = codecs.decode(value, 'hex') + obj = bson.decode_all(byt)[0] + + print('Key:\t%s' % key) + print('Value:\n\t%s' % (pprint.pformat(obj, indent=1).replace('\n', '\n\t'),)) + +def dump_mdb_file(wtpath, filename): + # Dump the MongoDB file into hex format. + mdb_hex = subprocess.check_output([wtpath, "dump", "-x", "file:" + filename], universal_newlines=True) + + mdb_file_contents = mdb_hex.splitlines() + data_index = find_data_section(mdb_file_contents) + if data_index > 0: + decode_data_section(mdb_file_contents, data_index) + else: + print("Error: No data section was found in the file.") + exit() + +if len(sys.argv) != 3: + util_usage() + exit() + +wtpath = sys.argv[1] +filename = sys.argv[2] +dump_mdb_file(wtpath, filename) |