diff options
author | Kyle Suarez <kyle.suarez@mongodb.com> | 2017-12-21 21:03:30 -0500 |
---|---|---|
committer | Kyle Suarez <kyle.suarez@mongodb.com> | 2017-12-21 21:03:30 -0500 |
commit | 79352e71b697cb8c126510095bba7fd816128701 (patch) | |
tree | 76a1a6113d4187fe137b06da9898a49b38e19eb0 /jstests/aggregation | |
parent | b82d22183aac3443188becfe84329374e475b5dc (diff) | |
download | mongo-79352e71b697cb8c126510095bba7fd816128701.tar.gz |
SERVER-32297, SERVER-32430 fix $sort in-memory sort and $sortKey serialization
Diffstat (limited to 'jstests/aggregation')
-rw-r--r-- | jstests/aggregation/sources/sort/collation_sort_japanese.js | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/jstests/aggregation/sources/sort/collation_sort_japanese.js b/jstests/aggregation/sources/sort/collation_sort_japanese.js new file mode 100644 index 00000000000..5bfad05af31 --- /dev/null +++ b/jstests/aggregation/sources/sort/collation_sort_japanese.js @@ -0,0 +1,146 @@ +/** + * Tests that the $sort stage performs sorts correctly, whether in-memory, merging on mongos, or + * merging on a shard. (The sharding scenarios are tested when this test is run in the + * aggregation_sharded_collections_passthrough.) + */ +(function() { + "use strict"; + + Random.setRandomSeed(); + const coll = db.getCollection("collation_sort_japanese"); + + // In Japanese, the order of vowels is a, i, u, e, o. The sorting of mixed katakana and hiragana + // vowels differs depending on the collation: + // + // - With the simple collation, hiragana vowels come first (in order), followed by katakana. + // - In the Japanese locale, vowels with the same sound sort together. Whether hiragana or + // katakana comes first depends on the strength level of the collation. + const data = [ + {kana: "ア", val: 0, name: "katakana a"}, + {kana: "イ", val: 1, name: "katakana i"}, + {kana: "ウ", val: 2, name: "katakana u"}, + {kana: "エ", val: 3, name: "katakana e"}, + {kana: "オ", val: 4, name: "katakana o"}, + {kana: "あ", val: 5, name: "hiragana a"}, + {kana: "い", val: 6, name: "hiragana i"}, + {kana: "う", val: 7, name: "hiragana u"}, + {kana: "え", val: 8, name: "hiragana e"}, + {kana: "お", val: 9, name: "hiragana o"}, + ]; + + const simpleCollation = {locale: "simple"}; + const jaCollationStr3 = {locale: "ja"}; + const jaCollationStr4 = {locale: "ja", strength: 4}; + + /** + * Inserts each doc of 'docs' into the collection in no specified order before running tests. + */ + function runTests(docs) { + let bulk = coll.initializeUnorderedBulkOp(); + for (let doc of docs) { + bulk.insert(doc); + } + assert.writeOK(bulk.execute()); + + let sortOrder; + + function assertAggregationSortOrder(collation, expectedVals) { + let expectedDocs = expectedVals.map(val => ({val: val})); + let result = coll.aggregate([{$sort: sortOrder}, {$project: {_id: 0, val: 1}}], + {collation: collation}) + .toArray(); + assert.eq(result, + expectedDocs, + "sort returned wrong order with sort pattern " + tojson(sortOrder) + + " and collation " + tojson(collation)); + + // Run the same aggregation, but in a sharded cluster, force the merging to be performed + // on a shard instead of on mongos. + result = coll.aggregate( + [ + {$_internalSplitPipeline: {mergeType: "anyShard"}}, + {$sort: sortOrder}, + {$project: {_id: 0, val: 1}} + ], + {collation: collation}) + .toArray(); + assert.eq(result, + expectedDocs, + "sort returned wrong order with sort pattern " + tojson(sortOrder) + + " and collation " + tojson(collation) + " when merging on a shard"); + } + + // Start with a sort on a single key. + sortOrder = {kana: 1}; + + // With the binary collation, hiragana codepoints sort before katakana codepoints. + assertAggregationSortOrder(simpleCollation, [5, 6, 7, 8, 9, 0, 1, 2, 3, 4]); + + // With the Japanese collation at strength 4, a hiragana codepoint always sorts before its + // equivalent katakana. + assertAggregationSortOrder(jaCollationStr4, [5, 0, 6, 1, 7, 2, 8, 3, 9, 4]); + + // Test a sort on a compound key. + sortOrder = {kana: 1, val: 1}; + + // With the binary collation, hiragana codepoints sort before katakana codepoints. + assertAggregationSortOrder(simpleCollation, [5, 6, 7, 8, 9, 0, 1, 2, 3, 4]); + + // With the default Japanese collation, hiragana and katakana with the same pronunciation + // sort together but with no specified order. The compound sort on "val" breaks the tie and + // puts the katakana first. + assertAggregationSortOrder(jaCollationStr3, [0, 5, 1, 6, 2, 7, 3, 8, 4, 9]); + + // With the Japanese collation at strength 4, a hiragana codepoint always sorts before its + // equivalent katakana. + assertAggregationSortOrder(jaCollationStr4, [5, 0, 6, 1, 7, 2, 8, 3, 9, 4]); + } + + // Test sorting documents with only scalar values. + coll.drop(); + runTests(data); + + // Test sorting documents containing singleton arrays. + assert(coll.drop()); + runTests(data.map(doc => { + let copy = Object.extend({}, doc); + copy.kana = [copy.kana]; + return copy; + })); + + // Test sorting documents containing arrays with multiple elements. + assert(coll.drop()); + runTests(data.map(doc => { + let copy = Object.extend({}, doc); + copy.kana = [copy.kana, copy.kana, copy.kana]; + return copy; + })); + + // Test sorting documents where some values are scalars and others are arrays. + assert(coll.drop()); + runTests(data.map(doc => { + let copy = Object.extend({}, doc); + if (Math.random() < 0.5) { + copy.kana = [copy.kana]; + } + return copy; + })); + + // Create indexes that provide sorts and assert that the results are equivalent. + assert(coll.drop()); + assert.commandWorked( + coll.createIndex({kana: 1}, {name: "k1_jaStr3", collation: jaCollationStr3})); + assert.commandWorked( + coll.createIndex({kana: 1}, {name: "k1_jaStr4", collation: jaCollationStr4})); + assert.commandWorked( + coll.createIndex({kana: 1, val: 1}, {name: "k1v1_jaStr3", collation: jaCollationStr3})); + assert.commandWorked( + coll.createIndex({kana: 1, val: 1}, {name: "k1v1_jaStr4", collation: jaCollationStr4})); + runTests(data.map(doc => { + let copy = Object.extend({}, doc); + if (Math.random() < 0.5) { + copy.kana = [copy.kana]; + } + return copy; + })); +}()); |