summaryrefslogtreecommitdiff
path: root/jstests/aggregation/sources/sort/collation_sort_japanese.js
blob: 5bfad05af31570e46d4afa5a63caabdd5bfedda5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
/**
 * Tests that the $sort stage performs sorts correctly, whether in-memory, merging on mongos, or
 * merging on a shard. (The sharding scenarios are tested when this test is run in the
 * aggregation_sharded_collections_passthrough.)
 */
(function() {
    "use strict";

    Random.setRandomSeed();
    const coll = db.getCollection("collation_sort_japanese");

    // In Japanese, the order of vowels is a, i, u, e, o. The sorting of mixed katakana and hiragana
    // vowels differs depending on the collation:
    //
    //  - With the simple collation, hiragana vowels come first (in order), followed by katakana.
    //  - In the Japanese locale, vowels with the same sound sort together. Whether hiragana or
    //    katakana comes first depends on the strength level of the collation.
    const data = [
        {kana: "ア", val: 0, name: "katakana a"},
        {kana: "イ", val: 1, name: "katakana i"},
        {kana: "ウ", val: 2, name: "katakana u"},
        {kana: "エ", val: 3, name: "katakana e"},
        {kana: "オ", val: 4, name: "katakana o"},
        {kana: "あ", val: 5, name: "hiragana a"},
        {kana: "い", val: 6, name: "hiragana i"},
        {kana: "う", val: 7, name: "hiragana u"},
        {kana: "え", val: 8, name: "hiragana e"},
        {kana: "お", val: 9, name: "hiragana o"},
    ];

    const simpleCollation = {locale: "simple"};
    const jaCollationStr3 = {locale: "ja"};
    const jaCollationStr4 = {locale: "ja", strength: 4};

    /**
     * Inserts each doc of 'docs' into the collection in no specified order before running tests.
     */
    function runTests(docs) {
        let bulk = coll.initializeUnorderedBulkOp();
        for (let doc of docs) {
            bulk.insert(doc);
        }
        assert.writeOK(bulk.execute());

        let sortOrder;

        function assertAggregationSortOrder(collation, expectedVals) {
            let expectedDocs = expectedVals.map(val => ({val: val}));
            let result = coll.aggregate([{$sort: sortOrder}, {$project: {_id: 0, val: 1}}],
                                        {collation: collation})
                             .toArray();
            assert.eq(result,
                      expectedDocs,
                      "sort returned wrong order with sort pattern " + tojson(sortOrder) +
                          " and collation " + tojson(collation));

            // Run the same aggregation, but in a sharded cluster, force the merging to be performed
            // on a shard instead of on mongos.
            result = coll.aggregate(
                             [
                               {$_internalSplitPipeline: {mergeType: "anyShard"}},
                               {$sort: sortOrder},
                               {$project: {_id: 0, val: 1}}
                             ],
                             {collation: collation})
                         .toArray();
            assert.eq(result,
                      expectedDocs,
                      "sort returned wrong order with sort pattern " + tojson(sortOrder) +
                          " and collation " + tojson(collation) + " when merging on a shard");
        }

        // Start with a sort on a single key.
        sortOrder = {kana: 1};

        // With the binary collation, hiragana codepoints sort before katakana codepoints.
        assertAggregationSortOrder(simpleCollation, [5, 6, 7, 8, 9, 0, 1, 2, 3, 4]);

        // With the Japanese collation at strength 4, a hiragana codepoint always sorts before its
        // equivalent katakana.
        assertAggregationSortOrder(jaCollationStr4, [5, 0, 6, 1, 7, 2, 8, 3, 9, 4]);

        // Test a sort on a compound key.
        sortOrder = {kana: 1, val: 1};

        // With the binary collation, hiragana codepoints sort before katakana codepoints.
        assertAggregationSortOrder(simpleCollation, [5, 6, 7, 8, 9, 0, 1, 2, 3, 4]);

        // With the default Japanese collation, hiragana and katakana with the same pronunciation
        // sort together but with no specified order. The compound sort on "val" breaks the tie and
        // puts the katakana first.
        assertAggregationSortOrder(jaCollationStr3, [0, 5, 1, 6, 2, 7, 3, 8, 4, 9]);

        // With the Japanese collation at strength 4, a hiragana codepoint always sorts before its
        // equivalent katakana.
        assertAggregationSortOrder(jaCollationStr4, [5, 0, 6, 1, 7, 2, 8, 3, 9, 4]);
    }

    // Test sorting documents with only scalar values.
    coll.drop();
    runTests(data);

    // Test sorting documents containing singleton arrays.
    assert(coll.drop());
    runTests(data.map(doc => {
        let copy = Object.extend({}, doc);
        copy.kana = [copy.kana];
        return copy;
    }));

    // Test sorting documents containing arrays with multiple elements.
    assert(coll.drop());
    runTests(data.map(doc => {
        let copy = Object.extend({}, doc);
        copy.kana = [copy.kana, copy.kana, copy.kana];
        return copy;
    }));

    // Test sorting documents where some values are scalars and others are arrays.
    assert(coll.drop());
    runTests(data.map(doc => {
        let copy = Object.extend({}, doc);
        if (Math.random() < 0.5) {
            copy.kana = [copy.kana];
        }
        return copy;
    }));

    // Create indexes that provide sorts and assert that the results are equivalent.
    assert(coll.drop());
    assert.commandWorked(
        coll.createIndex({kana: 1}, {name: "k1_jaStr3", collation: jaCollationStr3}));
    assert.commandWorked(
        coll.createIndex({kana: 1}, {name: "k1_jaStr4", collation: jaCollationStr4}));
    assert.commandWorked(
        coll.createIndex({kana: 1, val: 1}, {name: "k1v1_jaStr3", collation: jaCollationStr3}));
    assert.commandWorked(
        coll.createIndex({kana: 1, val: 1}, {name: "k1v1_jaStr4", collation: jaCollationStr4}));
    runTests(data.map(doc => {
        let copy = Object.extend({}, doc);
        if (Math.random() < 0.5) {
            copy.kana = [copy.kana];
        }
        return copy;
    }));
}());