summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2021-12-30 14:13:27 +1100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-30 03:47:13 +0000
commitd84b3fd558566046a94a32b5315566848de18062 (patch)
treed12df6f63af7d7f7f396ecba3e45f4d0dc25315f
parent9175c0ded9d2714b9b126fa9c036a0e8015e7796 (diff)
downloadmongo-d84b3fd558566046a94a32b5315566848de18062.tar.gz
Import wiredtiger: eaf625d02f3cc4d1381eb1ca6d6fcd90ff859aac from branch mongodb-5.2
ref: 91d0ca7039..eaf625d02f for: 5.2.0-rc4 WT-8523 Create script that can output MongoDB tables into BSON format
-rw-r--r--src/third_party/wiredtiger/import.data2
-rwxr-xr-xsrc/third_party/wiredtiger/tools/wt_to_mdb_bson.py76
2 files changed, 77 insertions, 1 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 6b96758a20d..38bf67a458a 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-5.2",
- "commit": "91d0ca7039b960e8de6241925b43eca47c3ef813"
+ "commit": "eaf625d02f3cc4d1381eb1ca6d6fcd90ff859aac"
}
diff --git a/src/third_party/wiredtiger/tools/wt_to_mdb_bson.py b/src/third_party/wiredtiger/tools/wt_to_mdb_bson.py
new file mode 100755
index 00000000000..353ee8c672d
--- /dev/null
+++ b/src/third_party/wiredtiger/tools/wt_to_mdb_bson.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import bson, codecs, pprint, subprocess, sys
+
+# Decodes a MongoDB file into a readable format.
+def util_usage():
+ print("Usage: wt_to_mdb_bson <path_to_wt> filename")
+
+# Navigate to the data section of the MongoDB file if it exists.
+def find_data_section(mdb_file_contents):
+ for i in range(len(mdb_file_contents)):
+ line = mdb_file_contents[i].strip()
+ if line == 'Data':
+ return i + 1
+
+ # No data section was found, return an invalid index.
+ return -1
+
+# Decode the keys and values from hex format to a readable BSON format.
+def decode_data_section(mdb_file_contents, data_index):
+ # Loop through the data section and increment by 2, since we parse the K/V pairs.
+ for i in range(data_index, len(mdb_file_contents), 2):
+ key = mdb_file_contents[i].strip()
+ value = mdb_file_contents[i + 1].strip()
+
+ byt = codecs.decode(value, 'hex')
+ obj = bson.decode_all(byt)[0]
+
+ print('Key:\t%s' % key)
+ print('Value:\n\t%s' % (pprint.pformat(obj, indent=1).replace('\n', '\n\t'),))
+
+def dump_mdb_file(wtpath, filename):
+ # Dump the MongoDB file into hex format.
+ mdb_hex = subprocess.check_output([wtpath, "dump", "-x", "file:" + filename], universal_newlines=True)
+
+ mdb_file_contents = mdb_hex.splitlines()
+ data_index = find_data_section(mdb_file_contents)
+ if data_index > 0:
+ decode_data_section(mdb_file_contents, data_index)
+ else:
+ print("Error: No data section was found in the file.")
+ exit()
+
+if len(sys.argv) != 3:
+ util_usage()
+ exit()
+
+wtpath = sys.argv[1]
+filename = sys.argv[2]
+dump_mdb_file(wtpath, filename)