diff options
author | Mindaugas Malinauskas <mindaugas.malinauskas@mongodb.com> | 2020-09-24 09:55:23 +0100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-10-04 22:27:06 +0000 |
commit | cc163fc8eba89c87a9cabc7d3dbb2ab41887ccfc (patch) | |
tree | 07fe9f0ebf910ed57672b371323e240c2c9cbf6a | |
parent | 5726924e5c92e019a1730afb8b0724f0cb0e3473 (diff) | |
download | mongo-cc163fc8eba89c87a9cabc7d3dbb2ab41887ccfc.tar.gz |
SERVER-51120 Find queries with MERGE_SORT incorrectly sort the results when the collation is specified
(cherry picked from commit eee7fb8f2c6da144e9d4c3df7887a5ec167f3a6f)
-rw-r--r-- | jstests/core/explode_for_sort_collation.js | 217 | ||||
-rw-r--r-- | jstests/core/merge_sort_collation.js | 499 | ||||
-rw-r--r-- | jstests/libs/parallelTester.js | 2 | ||||
-rw-r--r-- | src/mongo/db/exec/merge_sort.cpp | 43 | ||||
-rw-r--r-- | src/mongo/db/exec/merge_sort.h | 3 | ||||
-rw-r--r-- | src/mongo/dbtests/query_stage_merge_sort.cpp | 19 |
6 files changed, 556 insertions, 227 deletions
diff --git a/jstests/core/explode_for_sort_collation.js b/jstests/core/explode_for_sort_collation.js deleted file mode 100644 index 2329f6b7987..00000000000 --- a/jstests/core/explode_for_sort_collation.js +++ /dev/null @@ -1,217 +0,0 @@ -/** - * Tests explode for sort query planner behavior with collated queries and indexes. This is a test - * for SERVER-48993. - * @tags: [requires_find_command] - */ -(function() { - "use strict"; - - const testDB = db.getSiblingDB(jsTestName()); - - // Drop the test database. - assert.commandWorked(testDB.dropDatabase()); - - const coll = testDB.explode_for_sort; - - // Executes a test case that creates an index, inserts documents, issues a find query on a - // collection and compares the results with the expected collection. - function executeQueryTestCase(testCase) { - jsTestLog(tojson(testCase)); - - // Create a collection. - coll.drop(); - assert.commandWorked(testDB.createCollection(coll.getName())); - - // Create an index. - assert.commandWorked(coll.createIndex(testCase.indexKeyPattern, testCase.indexOptions)); - - // Insert some documents into the collection. - assert.commandWorked(coll.insert(testCase.inputDocuments)); - - // Run a find query with optionally specified collation. - let cursor = coll.find(testCase.filter).sort(testCase.sort); - if (testCase.findCollation !== undefined) { - cursor = cursor.collation(testCase.findCollation); - } - const actualResults = cursor.toArray(); - - // Compare results to expected. - assert.eq(actualResults, testCase.expectedResults); - } - - const standardInputDocuments = - [{_id: 0, a: 0, b: "CC"}, {_id: 1, a: 0, b: "AA"}, {_id: 2, a: 0, b: "bb"}]; - - const testCases = [ - { - // Verifies that a non-collatable point-query on the prefix of the index key together with - // a - // sort on a suffix of the index key returns correct results when the index is a compound - // index with a non-simple collation and the query does not have an explicit collation. - indexKeyPattern: {a: 1, b: 1}, - indexOptions: {collation: {locale: "en_US", strength: 1}}, - filter: {a: 0}, - sort: {b: 1}, - inputDocuments: standardInputDocuments, - expectedResults: - [{_id: 1, a: 0, b: "AA"}, {_id: 0, a: 0, b: "CC"}, {_id: 2, a: 0, b: "bb"}] - }, - { - // Verifies that a non-collatable point-query on the prefix of the index key together with - // a - // sort on a suffix of the index key returns correct results when the index is a compound - // index with a non-simple collation and the query explicitly specifies the simple - // collation. - indexKeyPattern: {a: 1, b: 1}, - indexOptions: {collation: {locale: "en_US", strength: 1}}, - filter: {a: 0}, - sort: {b: 1}, - findCollation: {locale: "simple"}, - inputDocuments: standardInputDocuments, - expectedResults: - [{_id: 1, a: 0, b: "AA"}, {_id: 0, a: 0, b: "CC"}, {_id: 2, a: 0, b: "bb"}] - }, - { - // Verifies that a non-collatable point-query on the prefix of the index key together with - // a - // sort on a suffix of the index key returns correct results when the index is a compound - // index with a simple collation and the query explicitly specifies a non-simple - // collation. - indexKeyPattern: {a: 1, b: 1}, - filter: {a: 0}, - sort: {b: 1}, - findCollation: {locale: "en_US", strength: 1}, - inputDocuments: standardInputDocuments, - expectedResults: - [{_id: 1, a: 0, b: "AA"}, {_id: 2, a: 0, b: "bb"}, {_id: 0, a: 0, b: "CC"}] - }, - { - // Verifies that a non-collatable point-query on the prefix of the index key together with - // a - // sort on a suffix of the index key returns correct results when the index is a compound - // index with a simple collation and the query explicitly specifies a non-simple - // collation. - indexKeyPattern: {a: 1, b: 1}, - indexOptions: {collation: {locale: "simple"}}, - filter: {a: 0}, - sort: {b: 1}, - findCollation: {locale: "en_US", strength: 1}, - inputDocuments: standardInputDocuments, - expectedResults: - [{_id: 1, a: 0, b: "AA"}, {_id: 2, a: 0, b: "bb"}, {_id: 0, a: 0, b: "CC"}] - }, - { - // Verifies that a non-collatable point-query on the prefix of the index key together with - // a - // sort on a suffix of the index key returns correct results when the index is a compound - // index with a non-simple collation that is different from the query's. - indexKeyPattern: {a: 1, b: 1}, - indexOptions: {collation: {locale: "en_US", strength: 5}}, - filter: {a: 0}, - sort: {b: 1}, - findCollation: {locale: "en_US", strength: 1}, - inputDocuments: standardInputDocuments, - expectedResults: - [{_id: 1, a: 0, b: "AA"}, {_id: 2, a: 0, b: "bb"}, {_id: 0, a: 0, b: "CC"}] - }, - { - // Verifies that a non-collatable point-query on the prefix of the index key, a collatable - // range-query on the suffix, and a sort on the suffix of the index key returns correct - // results when the index is a compound index with a non-simple collation and the query - // does - // not have an explicit collation. - indexKeyPattern: {a: 1, b: 1}, - indexOptions: {collation: {locale: "en_US", strength: 1}}, - filter: {a: 0, b: {$gte: 'A', $lt: 'D'}}, - sort: {b: 1}, - inputDocuments: standardInputDocuments, - expectedResults: [{_id: 1, a: 0, b: "AA"}, {_id: 0, a: 0, b: "CC"}] - }, - { - // Verifies that a non-collatable point-query on the prefix of the index key, a collatable - // range-query and a sort on a prefix of a suffix of the index key returns correct results - // when the index is a compound index with a non-simple collation and the query does not - // have an explicit collation. - indexKeyPattern: {a: 1, b: 1, c: 1}, - indexOptions: {collation: {locale: "en_US", strength: 1}}, - filter: {a: 0, b: {$gte: 'A', $lt: 'D'}}, - sort: {b: 1}, - inputDocuments: standardInputDocuments, - expectedResults: [{_id: 1, a: 0, b: "AA"}, {_id: 0, a: 0, b: "CC"}] - }, - { - // Verifies that a non-collatable multi-point query on the prefix of the index key, a - // collatable range-query on the suffix, and a sort on the suffix of the index key returns - // correct results when the index is a compound index with a non-simple collation and the - // query does not have an explicit collation. - indexKeyPattern: {a: 1, b: 1}, - indexOptions: {collation: {locale: "en_US", strength: 1}}, - filter: {a: {$in: [0, 2]}, b: {$gte: 'A', $lt: 'D'}}, - sort: {b: 1}, - inputDocuments: [ - {_id: 0, a: 0, b: "CC"}, - {_id: 1, a: 0, b: "AA"}, - {_id: 2, a: 0, b: "bb"}, - {_id: 3, a: 2, b: "BB"} - ], - expectedResults: - [{_id: 1, a: 0, b: "AA"}, {_id: 3, a: 2, b: "BB"}, {_id: 0, a: 0, b: "CC"}] - }, - { - // Verifies that a non-collatable multi-point query on the prefix of the index key, a - // non-collatable range-query on the suffix, and a sort on the suffix of the index key - // returns correct results when the index is a compound index with a non-simple collation - // and the query does not have an explicit collation. - indexKeyPattern: {a: 1, b: 1}, - indexOptions: {collation: {locale: "en_US", strength: 1}}, - filter: {a: {$in: [0, 2]}, b: {$gte: 0, $lt: 10}}, - sort: {b: 1}, - inputDocuments: [ - {_id: 0, a: 0, b: 6}, - {_id: 1, a: 0, b: 10}, - {_id: 2, a: 0, b: "bb"}, - {_id: 3, a: 2, b: 5}, - {_id: 4, a: 2, b: 4} - ], - expectedResults: [{_id: 4, a: 2, b: 4}, {_id: 3, a: 2, b: 5}, {_id: 0, a: 0, b: 6}] - }, - { - // Verifies that a non-collatable multi-point query on the prefix of the index key, a - // non-collatable range-query on the suffix, and a sort on the suffix of the index key - // returns correct results when the index is a compound index with a simple collation - // and the query explicitly specifies a non-simple collation. - indexKeyPattern: {a: 1, b: 1}, - indexOptions: {collation: {locale: "simple"}}, - filter: {a: {$in: [0, 2]}, b: {$gte: 0, $lt: 10}}, - sort: {b: 1}, - findCollation: {locale: "en_US", strength: 1}, - inputDocuments: [ - {_id: 0, a: 0, b: 6}, - {_id: 1, a: 0, b: 10}, - {_id: 2, a: 0, b: "bb"}, - {_id: 3, a: 2, b: 5}, - {_id: 4, a: 2, b: 4} - ], - expectedResults: [{_id: 4, a: 2, b: 4}, {_id: 3, a: 2, b: 5}, {_id: 0, a: 0, b: 6}] - }, - { - // Verifies that a non-collatable point-query on the prefix of the index key, a - // non-collatable range-query on the suffix, and a sort on the suffix of the index key - // returns correct results when the index is a compound index with a non-simple collation - // and the query does not have an explicit collation. - indexKeyPattern: {a: 1, b: 1}, - indexOptions: {collation: {locale: "en_US", strength: 1}}, - filter: {a: 0, b: {$gte: 0, $lt: 10}}, - sort: {b: 1}, - inputDocuments: [ - {_id: 0, a: 0, b: 6}, - {_id: 1, a: 0, b: 5}, - {_id: 2, a: 0, b: "bb"}, - {_id: 3, a: 0, b: 4} - ], - expectedResults: [{_id: 3, a: 0, b: 4}, {_id: 1, a: 0, b: 5}, {_id: 0, a: 0, b: 6}] - } - ]; - - testCases.forEach(executeQueryTestCase); -}());
\ No newline at end of file diff --git a/jstests/core/merge_sort_collation.js b/jstests/core/merge_sort_collation.js new file mode 100644 index 00000000000..8ebbaf8da70 --- /dev/null +++ b/jstests/core/merge_sort_collation.js @@ -0,0 +1,499 @@ +/** + * Tests following query related aspects involving collated queries and indexes: + * 1) explode for sort query planner behavior related to the selection of MERGE_SORT stage; + * 2) MERGE_SORT stage execution. + * @tags: [ + * requires_find_command, + * ] + */ +(function() { + "use strict"; + + const testDB = db.getSiblingDB(jsTestName()); + const coll = testDB.explode_for_sort; + + // Executes a test case that creates a collection and indexes, inserts documents, issues a find + // query on a collection and compares the results with the expected collection. + function executeQueryTestCase(testCase) { + jsTestLog(tojson(testCase)); + + // Drop the test database. + assert.commandWorked(testDB.dropDatabase()); + + // Create a collection. + const collectionOptions = {}; + if (testCase.collectionCollation !== undefined) { + collectionOptions.collation = testCase.collectionCollation; + } + assert.commandWorked(testDB.createCollection(coll.getName(), collectionOptions)); + + // Create index(es). + if (testCase.indexes !== undefined) { + assert.commandWorked(coll.createIndexes(testCase.indexes, testCase.indexOptions)); + } + + // Insert some documents into the collection. + assert.commandWorked(coll.insert(testCase.inputDocuments)); + + // Run a find query with optionally specified collation and projection. + let projection = {}; + if (testCase.projection !== undefined) { + projection = testCase.projection; + } + let cursor = coll.find(testCase.filter, projection).sort(testCase.sort); + if (testCase.findCollation !== undefined) { + cursor = cursor.collation(testCase.findCollation); + } + const actualResults = cursor.toArray(); + + // Compare results to expected. + assert.eq(actualResults, testCase.expectedResults); + } + + const standardInputDocuments = + [{_id: 0, a: 0, b: "CC"}, {_id: 1, a: 0, b: "AA"}, {_id: 2, a: 0, b: "bb"}]; + + const testCases = [ + { + // Verifies that a non-collatable point-query on the prefix of the index key together with + // a + // sort on a suffix of the index key returns correct results when the index is a compound + // index with a non-simple collation and the query does not have an explicit collation. + indexes: [{a: 1, b: 1}], + indexOptions: {collation: {locale: "en_US", strength: 1}}, + filter: {a: 0}, + sort: {b: 1}, + inputDocuments: standardInputDocuments, + expectedResults: + [{_id: 1, a: 0, b: "AA"}, {_id: 0, a: 0, b: "CC"}, {_id: 2, a: 0, b: "bb"}] + }, + { + // Verifies that a non-collatable point-query on the prefix of the index key together with + // a + // sort on a suffix of the index key returns correct results when the index is a compound + // index with a non-simple collation and the query explicitly specifies the simple + // collation. + indexes: [{a: 1, b: 1}], + indexOptions: {collation: {locale: "en_US", strength: 1}}, + filter: {a: 0}, + sort: {b: 1}, + findCollation: {locale: "simple"}, + inputDocuments: standardInputDocuments, + expectedResults: + [{_id: 1, a: 0, b: "AA"}, {_id: 0, a: 0, b: "CC"}, {_id: 2, a: 0, b: "bb"}] + }, + { + // Verifies that a non-collatable point-query on the prefix of the index key together with + // a + // sort on a suffix of the index key returns correct results when the index is a compound + // index with a simple collation and the query explicitly specifies a non-simple + // collation. + indexes: [{a: 1, b: 1}], + filter: {a: 0}, + sort: {b: 1}, + findCollation: {locale: "en_US", strength: 1}, + inputDocuments: standardInputDocuments, + expectedResults: + [{_id: 1, a: 0, b: "AA"}, {_id: 2, a: 0, b: "bb"}, {_id: 0, a: 0, b: "CC"}] + }, + { + // Verifies that a non-collatable point-query on the prefix of the index key together with + // a + // sort on a suffix of the index key returns correct results when the index is a compound + // index with a simple collation and the query explicitly specifies a non-simple + // collation. + indexes: [{a: 1, b: 1}], + indexOptions: {collation: {locale: "simple"}}, + filter: {a: 0}, + sort: {b: 1}, + findCollation: {locale: "en_US", strength: 1}, + inputDocuments: standardInputDocuments, + expectedResults: + [{_id: 1, a: 0, b: "AA"}, {_id: 2, a: 0, b: "bb"}, {_id: 0, a: 0, b: "CC"}] + }, + { + // Verifies that a non-collatable point-query on the prefix of the index key together with + // a + // sort on a suffix of the index key returns correct results when the index is a compound + // index with a non-simple collation that is different from the query's. + indexes: [{a: 1, b: 1}], + indexOptions: {collation: {locale: "en_US", strength: 5}}, + filter: {a: 0}, + sort: {b: 1}, + findCollation: {locale: "en_US", strength: 1}, + inputDocuments: standardInputDocuments, + expectedResults: + [{_id: 1, a: 0, b: "AA"}, {_id: 2, a: 0, b: "bb"}, {_id: 0, a: 0, b: "CC"}] + }, + { + // Verifies that a non-collatable point-query on the prefix of the index key, a collatable + // range-query on the suffix, and a sort on the suffix of the index key returns correct + // results when the index is a compound index with a non-simple collation and the query + // does + // not have an explicit collation. + indexes: [{a: 1, b: 1}], + indexOptions: {collation: {locale: "en_US", strength: 1}}, + filter: {a: 0, b: {$gte: 'A', $lt: 'D'}}, + sort: {b: 1}, + inputDocuments: standardInputDocuments, + expectedResults: [{_id: 1, a: 0, b: "AA"}, {_id: 0, a: 0, b: "CC"}] + }, + { + // Verifies that a non-collatable point-query on the prefix of the index key, a collatable + // range-query and a sort on a prefix of a suffix of the index key returns correct results + // when the index is a compound index with a non-simple collation and the query does not + // have an explicit collation. + indexes: [{a: 1, b: 1, c: 1}], + indexOptions: {collation: {locale: "en_US", strength: 1}}, + filter: {a: 0, b: {$gte: 'A', $lt: 'D'}}, + sort: {b: 1}, + inputDocuments: standardInputDocuments, + expectedResults: [{_id: 1, a: 0, b: "AA"}, {_id: 0, a: 0, b: "CC"}] + }, + { + // Verifies that a non-collatable multi-point query on the prefix of the index key, a + // collatable range-query on the suffix, and a sort on the suffix of the index key returns + // correct results when the index is a compound index with a non-simple collation and the + // query does not have an explicit collation. + indexes: [{a: 1, b: 1}], + indexOptions: {collation: {locale: "en_US", strength: 1}}, + filter: {a: {$in: [0, 2]}, b: {$gte: 'A', $lt: 'D'}}, + sort: {b: 1}, + inputDocuments: [ + {_id: 0, a: 0, b: "CC"}, + {_id: 1, a: 0, b: "AA"}, + {_id: 2, a: 0, b: "bb"}, + {_id: 3, a: 2, b: "BB"} + ], + expectedResults: + [{_id: 1, a: 0, b: "AA"}, {_id: 3, a: 2, b: "BB"}, {_id: 0, a: 0, b: "CC"}] + }, + { + // Verifies that a non-collatable multi-point query on the prefix of the index key, a + // non-collatable range-query on the suffix, and a sort on the suffix of the index key + // returns correct results when the index is a compound index with a non-simple collation + // and the query does not have an explicit collation. + indexes: [{a: 1, b: 1}], + indexOptions: {collation: {locale: "en_US", strength: 1}}, + filter: {a: {$in: [0, 2]}, b: {$gte: 0, $lt: 10}}, + sort: {b: 1}, + inputDocuments: [ + {_id: 0, a: 0, b: 6}, + {_id: 1, a: 0, b: 10}, + {_id: 2, a: 0, b: "bb"}, + {_id: 3, a: 2, b: 5}, + {_id: 4, a: 2, b: 4} + ], + expectedResults: [{_id: 4, a: 2, b: 4}, {_id: 3, a: 2, b: 5}, {_id: 0, a: 0, b: 6}] + }, + { + // Verifies that a non-collatable multi-point query on the prefix of the index key, a + // non-collatable range-query on the suffix, and a sort on the suffix of the index key + // returns correct results when the index is a compound index with a simple collation + // and the query explicitly specifies a non-simple collation. + indexes: [{a: 1, b: 1}], + indexOptions: {collation: {locale: "simple"}}, + filter: {a: {$in: [0, 2]}, b: {$gte: 0, $lt: 10}}, + sort: {b: 1}, + findCollation: {locale: "en_US", strength: 1}, + inputDocuments: [ + {_id: 0, a: 0, b: 6}, + {_id: 1, a: 0, b: 10}, + {_id: 2, a: 0, b: "bb"}, + {_id: 3, a: 2, b: 5}, + {_id: 4, a: 2, b: 4} + ], + expectedResults: [{_id: 4, a: 2, b: 4}, {_id: 3, a: 2, b: 5}, {_id: 0, a: 0, b: 6}] + }, + { + // Verifies that a non-collatable point-query on the prefix of the index key, a + // non-collatable range-query on the suffix, and a sort on the suffix of the index key + // returns correct results when the index is a compound index with a non-simple collation + // and the query does not have an explicit collation. + indexes: [{a: 1, b: 1}], + indexOptions: {collation: {locale: "en_US", strength: 1}}, + filter: {a: 0, b: {$gte: 0, $lt: 10}}, + sort: {b: 1}, + inputDocuments: [ + {_id: 0, a: 0, b: 6}, + {_id: 1, a: 0, b: 5}, + {_id: 2, a: 0, b: "bb"}, + {_id: 3, a: 0, b: 4} + ], + expectedResults: [{_id: 3, a: 0, b: 4}, {_id: 1, a: 0, b: 5}, {_id: 0, a: 0, b: 6}] + }, + { + // Verifies that a collatable multi-point query on the prefix of the index key, and a sort + // on the suffix of the index key returns correct results when the index is a compound + // index + // with a non-simple collation and the query has a collation specified matching collation + // of + // the index. + indexes: [{a: 1, b: 1}], + indexOptions: {collation: {locale: "en", strength: 2}}, + filter: {a: {$in: ["1", "2"]}}, + sort: {b: 1}, + findCollation: {locale: "en", strength: 2}, + inputDocuments: [ + {_id: 0, a: "1", b: "a"}, + {_id: 1, a: "1", b: "c"}, + {_id: 2, a: "2", b: "b"}, + {_id: 3, a: "2", b: "d"} + ], + expectedResults: [ + {_id: 0, a: "1", b: "a"}, + {_id: 2, a: "2", b: "b"}, + {_id: 1, a: "1", b: "c"}, + {_id: 3, a: "2", b: "d"} + ] + }, + { + // Verifies that a collatable multi-point query on the prefix of the index key, and a sort + // on a prefix of a suffix of the index key returns correct results when the index is a + // compound index with a non-simple collation and the query has a collation specified + // matching collation of the index. + indexes: [{a: 1, b: 1, c: 1, d: 1}], + indexOptions: {collation: {locale: "en", strength: 2}}, + filter: {a: {$in: ["1", "2"]}, b: "1"}, + sort: {c: 1}, + findCollation: {locale: "en", strength: 2}, + inputDocuments: [ + {_id: 0, a: "1", b: "1", c: "a"}, + {_id: 1, a: "1", b: "1", c: "c"}, + {_id: 2, a: "2", b: "1", c: "b"}, + {_id: 3, a: "2", b: "1", c: "u", d: "a"} + ], + expectedResults: [ + {_id: 0, a: "1", b: "1", c: "a"}, + {_id: 2, a: "2", b: "1", c: "b"}, + {_id: 1, a: "1", b: "1", c: "c"}, + {_id: 3, a: "2", b: "1", c: "u", d: "a"} + ] + }, + { + // Verifies that a collatable $or query on the prefixes of the keys of 3 indexes, and a + // sort + // on a suffix of the keys of indexes returns correct results when the indexes are + // compound + // indexes with a non-simple collation and the query has a collation specified matching + // collation of the indexes. Also, the second/third operands to $or queries on fields + // 'e'/'g' that are not covered by the indexes therefore triggers addition of a FETCH + // stage + // between MERGE_SORT and IXSCAN. This tests comparison of index versus fetched document + // provided sort keys. In addition to that, some documents have objects as sort attribute + // values. + indexes: [{a: 1, b: 1, c: 1}, {d: 1, c: 1}, {f: 1, c: 1}], + indexOptions: {collation: {locale: "en", strength: 2}}, + filter: {$or: [{a: {$in: ["1", "2"]}, b: "1"}, {d: "3", e: "3"}, {f: "4", g: "3"}]}, + sort: {c: 1}, + findCollation: {locale: "en", strength: 2}, + inputDocuments: [ + {_id: 0, a: "1", b: "1", c: "a"}, + {_id: 1, a: "1", b: "1", c: "d"}, + {_id: 2, a: "2", b: "1", c: "b"}, + {_id: 3, a: "2", b: "1", c: "e"}, + {_id: 6, a: "2", b: "1", c: {a: "B"}}, + {_id: 4, d: "3", e: "3", c: "c"}, + {_id: 5, d: "3", e: "3", c: "f"}, + {_id: 7, d: "3", e: "3", c: {a: "A"}}, + {_id: 8, d: "3", e: "3", c: {a: "C"}}, + {_id: 9, f: "4", g: "3", c: "g"}, + ], + expectedResults: [ + {_id: 0, a: "1", b: "1", c: "a"}, + {_id: 2, a: "2", b: "1", c: "b"}, + {_id: 4, d: "3", e: "3", c: "c"}, + {_id: 1, a: "1", b: "1", c: "d"}, + {_id: 3, a: "2", b: "1", c: "e"}, + {_id: 5, d: "3", e: "3", c: "f"}, + {_id: 9, f: "4", g: "3", c: "g"}, + {_id: 7, d: "3", e: "3", c: {a: "A"}}, + {_id: 6, a: "2", b: "1", c: {a: "B"}}, + {_id: 8, d: "3", e: "3", c: {a: "C"}}, + ] + }, + { + // Verifies that a multi-point query on the prefix of the index key, and a sort on the + // suffix of the index key returns correct results when the collection has a non-simple + // collation specified and the index is a compound index. + collectionCollation: {locale: "en", strength: 2}, + indexes: [{a: 1, b: 1, c: 1, d: 1}], + filter: {a: {$in: ["1", "2"]}, b: "1"}, + sort: {c: 1}, + inputDocuments: [ + {_id: 0, a: "1", b: "1", c: "a"}, + {_id: 1, a: "1", b: "1", c: "c"}, + {_id: 2, a: "2", b: "1", c: "b"}, + {_id: 3, a: "2", b: "1", c: "d"} + ], + expectedResults: [ + {_id: 0, a: "1", b: "1", c: "a"}, + {_id: 2, a: "2", b: "1", c: "b"}, + {_id: 1, a: "1", b: "1", c: "c"}, + {_id: 3, a: "2", b: "1", c: "d"} + ] + }, + { + // Verifies that a multi-point query on the prefix of the index key, and a sort on the + // suffix of the index key returns correct results when the collection has no collation + // specified and the index is a compound index. + indexes: [{a: 1, b: 1, c: 1, d: 1}], + filter: {a: {$in: ["1", "2"]}, b: "1"}, + sort: {c: 1}, + inputDocuments: [ + {_id: 0, a: "1", b: "1", c: "a"}, + {_id: 1, a: "1", b: "1", c: "c"}, + {_id: 2, a: "2", b: "1", c: "b"}, + {_id: 3, a: "2", b: "1", c: "d"} + ], + expectedResults: [ + {_id: 0, a: "1", b: "1", c: "a"}, + {_id: 2, a: "2", b: "1", c: "b"}, + {_id: 1, a: "1", b: "1", c: "c"}, + {_id: 3, a: "2", b: "1", c: "d"} + ] + }, + { + // Verifies that an $or query on the prefixes of index keys, and a sort on the suffix of + // the + // index keys returns correct results when the collection and the query have the same + // non-simple collation specified. + indexes: [{a: 1, c: 1}, {b: 1, c: 1}], + indexOptions: {collation: {locale: "en", strength: 2}}, + filter: {$or: [{a: "1"}, {b: "2"}]}, + sort: {c: 1}, + findCollation: {locale: "en", strength: 2}, + inputDocuments: [ + {_id: 0, a: "1", c: "a"}, + {_id: 1, a: "1", c: "c"}, + {_id: 2, b: "2", c: "b"}, + {_id: 3, b: "2", c: "d"} + ], + expectedResults: [ + {_id: 0, a: "1", c: "a"}, + {_id: 2, b: "2", c: "b"}, + {_id: 1, a: "1", c: "c"}, + {_id: 3, b: "2", c: "d"} + ] + }, + { + // Verifies that an $or query on the prefixes of index keys, and a sort on the suffix of + // the + // index keys returns correct results when the collection and the query have the same + // non-simple collation specified and one $or operand requires a FETCH. + indexes: [{a: 1, c: 1}, {b: 1, c: 1}], + indexOptions: {collation: {locale: "en", strength: 2}}, + filter: {$or: [{a: "1"}, {b: "2", d: "3"}]}, + sort: {c: 1}, + findCollation: {locale: "en", strength: 2}, + inputDocuments: [ + {_id: 0, a: "1", c: "a"}, + {_id: 1, a: "1", c: "c"}, + {_id: 2, b: "2", c: "b", d: "3"}, + {_id: 3, b: "2", c: "d", d: "3"} + ], + expectedResults: [ + {_id: 0, a: "1", c: "a"}, + {_id: 2, b: "2", c: "b", d: "3"}, + {_id: 1, a: "1", c: "c"}, + {_id: 3, b: "2", c: "d", d: "3"} + ] + }, + { + // Verifies that a non-collatable multi-point query on the prefix of the index key, and a + // collatable sort on the suffix of the index key returns correct results when the index + // is + // a compound index with a non-simple collation and the query has a collation specified + // matching collation of the index. + indexes: [{a: 1, b: 1}], + indexOptions: {collation: {locale: "en", strength: 2}}, + filter: {a: {$in: [1, 2]}}, + sort: {b: 1}, + findCollation: {locale: "en", strength: 2}, + inputDocuments: [ + {_id: 0, a: 1, b: "a"}, + {_id: 1, a: 1, b: "c"}, + {_id: 2, a: 2, b: "b"}, + {_id: 3, a: 2, b: "d"} + ], + expectedResults: [ + {_id: 0, a: 1, b: "a"}, + {_id: 2, a: 2, b: "b"}, + {_id: 1, a: 1, b: "c"}, + {_id: 3, a: 2, b: "d"} + ] + }, + { + // Verifies that a non-collatable $or query on the prefixes of the index keys, and a sort + // on + // suffixes of the index keys returns correct results when the index is a compound index + // with a non-simple collation that is different from the collation of the query. + indexes: [{a: 1, c: 1}, {b: 1, c: 1}], + indexOptions: {collation: {locale: "fr"}}, + filter: {$or: [{a: 1, c: 1}, {b: 2, c: 2}, {b: 2, c: 3}, {b: 2, c: 4}]}, + sort: {c: 1}, + findCollation: {locale: "en", strength: 2}, + inputDocuments: [ + {_id: 4, b: 2, c: 4}, + {_id: 2, b: 2, c: 2}, + {_id: 0, a: 1, c: 1}, + {_id: 3, b: 2, c: 3}, + ], + expectedResults: [ + {_id: 0, a: 1, c: 1}, + {_id: 2, b: 2, c: 2}, + {_id: 3, b: 2, c: 3}, + {_id: 4, b: 2, c: 4}, + ] + }, + { + // Verifies that a fully-index-covered non-collatable multi-point query on a prefix of an + // index key, and a sort on a suffix of an index key returns correct results when the + // index + // is a compound index. + indexes: [{a: 1, c: 1}], + filter: {a: {$in: [1, 2]}}, + projection: {_id: 0, a: 1, c: 1}, + sort: {c: 1}, + inputDocuments: [ + {_id: 0, a: 1, c: 5}, + {_id: 1, a: 1, c: 3}, + {_id: 2, a: 2, c: 1}, + {_id: 3, a: 2, c: 4}, + ], + expectedResults: [ + {a: 2, c: 1}, + {a: 1, c: 3}, + {a: 2, c: 4}, + {a: 1, c: 5}, + ] + }, + { + // Verifies that a non-collatable multi-point query on a prefix of an index key, and a + // collatable sort on a suffix of an index key returns correct results when the index is a + // compound index with a collation specified that matches a collation of the query. The + // query would be eligible to be covered by the index due to a projection, but requires a + // FETCH because index bounds include strings that are encoded as collated keys. + indexes: [{a: 1, c: 1}], + indexOptions: {collation: {locale: "en", strength: 2}}, + filter: {a: {$in: [1, 2]}}, + projection: {_id: 0, a: 1, c: 1}, + sort: {c: 1}, + findCollation: {locale: "en", strength: 2}, + inputDocuments: [ + {_id: 0, a: 1, c: "a"}, + {_id: 1, a: 1, c: "c"}, + {_id: 2, a: 2, c: "b"}, + {_id: 3, a: 2, c: "d"}, + ], + expectedResults: [ + {a: 1, c: "a"}, + {a: 2, c: "b"}, + {a: 1, c: "c"}, + {a: 2, c: "d"}, + ] + }, + ]; + + testCases.forEach(executeQueryTestCase); +}());
\ No newline at end of file diff --git a/jstests/libs/parallelTester.js b/jstests/libs/parallelTester.js index 87e8fa65a28..870e97193a0 100644 --- a/jstests/libs/parallelTester.js +++ b/jstests/libs/parallelTester.js @@ -237,7 +237,7 @@ if (typeof _threadInject != "undefined") { // The following tests cannot run when shell readMode is legacy. if (db.getMongo().readMode() === "legacy") { var requires_find_command = [ - "explode_for_sort_collation.js", + "merge_sort_collation.js", "views/views_aggregation.js", "views/views_change.js", "views/views_drop.js", diff --git a/src/mongo/db/exec/merge_sort.cpp b/src/mongo/db/exec/merge_sort.cpp index 7a840617566..a7aceb5aca4 100644 --- a/src/mongo/db/exec/merge_sort.cpp +++ b/src/mongo/db/exec/merge_sort.cpp @@ -33,6 +33,7 @@ #include "mongo/db/exec/scoped_timer.h" #include "mongo/db/exec/working_set.h" #include "mongo/db/exec/working_set_common.h" +#include "mongo/db/query/collation/collation_index_key.h" #include "mongo/db/query/collation/collator_interface.h" #include "mongo/stdx/memory.h" #include "mongo/util/mongoutils/str.h" @@ -206,11 +207,44 @@ bool MergeSortStage::StageWithValueComparison::operator()(const MergingRef& lhs, BSONElement lhsElt; verify(lhsMember->getFieldDotted(fn, &lhsElt)); + // Determine if the left-hand side sort key part comes from an index key. + auto lhsIsFromIndexKey = !lhsMember->hasObj(); + BSONElement rhsElt; verify(rhsMember->getFieldDotted(fn, &rhsElt)); + // Determine if the right-hand side sort key part comes from an index key. + auto rhsIsFromIndexKey = !rhsMember->hasObj(); + + // A collator to use for comparing the sort keys. We need a collator when values of both + // operands are supplied from a document and the query is collated. Otherwise bit-wise + // comparison should be used. + const CollatorInterface* collatorToUse = nullptr; + BSONObj collationEncodedKeyPart; // A backing storage for a collation-encoded key part + // (according to collator '_collator') of one of the + // operands - either 'lhsElt' or 'rhsElt'. + + if (nullptr == _collator || (lhsIsFromIndexKey && rhsIsFromIndexKey)) { + // Either the query has no collation or both sort key parts come directly from index + // keys. If the query has no collation, then the query planner should have guaranteed + // that we don't need to perform any collation-aware comparisons here. If both sort key + // parts come from index keys, we may need to respect a collation but the index keys are + // already collation-encoded, therefore we don't need to perform a collation-aware + // comparison here. + } else if (!lhsIsFromIndexKey && !rhsIsFromIndexKey) { + // Both sort key parts were extracted from fetched documents. These parts are not + // collation-encoded, so we will need to perform a collation-aware comparison. + collatorToUse = _collator; + } else { + // One of the sort key parts was extracted from fetched documents. Encode that part + // using the query's collation. + auto& keyPartFetchedFromDocument = rhsIsFromIndexKey ? lhsElt : rhsElt; + collationEncodedKeyPart = encodeKeyPartWithCollation(keyPartFetchedFromDocument); + keyPartFetchedFromDocument = collationEncodedKeyPart.firstElement(); + } + // false means don't compare field name. - int x = lhsElt.woCompare(rhsElt, false, _collator); + int x = lhsElt.woCompare(rhsElt, false, collatorToUse); if (-1 == patternElt.number()) { x = -x; } @@ -225,6 +259,13 @@ bool MergeSortStage::StageWithValueComparison::operator()(const MergingRef& lhs, return false; } +BSONObj MergeSortStage::StageWithValueComparison::encodeKeyPartWithCollation( + const BSONElement& keyPart) { + BSONObjBuilder objectBuilder; + CollationIndexKey::collationAwareIndexKeyAppend(keyPart, _collator, &objectBuilder); + return objectBuilder.obj(); +} + unique_ptr<PlanStageStats> MergeSortStage::getStats() { _commonStats.isEOF = isEOF(); diff --git a/src/mongo/db/exec/merge_sort.h b/src/mongo/db/exec/merge_sort.h index e4d2e9af190..1d1fe200fb0 100644 --- a/src/mongo/db/exec/merge_sort.h +++ b/src/mongo/db/exec/merge_sort.h @@ -137,6 +137,9 @@ private: bool operator()(const MergingRef& lhs, const MergingRef& rhs); private: + // Encodes sort key part 'keyPart' according to the collation of the query. + BSONObj encodeKeyPartWithCollation(const BSONElement& keyPart); + WorkingSet* _ws; BSONObj _pattern; const CollatorInterface* _collator; diff --git a/src/mongo/dbtests/query_stage_merge_sort.cpp b/src/mongo/dbtests/query_stage_merge_sort.cpp index 43160272861..e0ce986fdb2 100644 --- a/src/mongo/dbtests/query_stage_merge_sort.cpp +++ b/src/mongo/dbtests/query_stage_merge_sort.cpp @@ -851,7 +851,7 @@ public: msparams.pattern = BSON("c" << 1 << "d" << 1); CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); msparams.collator = &collator; - MergeSortStage* ms = new MergeSortStage(&_opCtx, msparams, ws.get(), coll); + auto ms = std::make_unique<MergeSortStage>(&_opCtx, msparams, ws.get(), coll); // a:1 IndexScanParams params; @@ -861,17 +861,20 @@ public: params.bounds.endKey = objWithMaxKey(1); params.bounds.boundInclusion = BoundInclusion::kIncludeBothStartAndEndKeys; params.direction = 1; - ms->addChild(new IndexScan(&_opCtx, params, ws.get(), NULL)); + auto idxScan = std::make_unique<IndexScan>(&_opCtx, params, ws.get(), nullptr); + + // Wrap 'idxScan' with a FETCH stage so a document is fetched and MERGE_SORT is forced to + // use the provided collator 'collator'. Also, this permits easier retrieval of result + // objects in the result verification code. + ms->addChild(new FetchStage(&_opCtx, ws.get(), idxScan.release(), nullptr, coll)); // b:1 params.descriptor = getIndex(secondIndex, coll); - ms->addChild(new IndexScan(&_opCtx, params, ws.get(), NULL)); + idxScan = std::make_unique<IndexScan>(&_opCtx, params, ws.get(), nullptr); + ms->addChild(new FetchStage(&_opCtx, ws.get(), idxScan.release(), nullptr, coll)); - unique_ptr<FetchStage> fetchStage = - make_unique<FetchStage>(&_opCtx, ws.get(), ms, nullptr, coll); - // Must fetch if we want to easily pull out an obj. - auto statusWithPlanExecutor = PlanExecutor::make( - &_opCtx, std::move(ws), std::move(fetchStage), coll, PlanExecutor::NO_YIELD); + auto statusWithPlanExecutor = + PlanExecutor::make(&_opCtx, std::move(ws), std::move(ms), coll, PlanExecutor::NO_YIELD); ASSERT_OK(statusWithPlanExecutor.getStatus()); auto exec = std::move(statusWithPlanExecutor.getValue()); |