SERVER-43816 Push $text and $meta:"textScore" sort into the PlanStage layer.

This change involves unifying the behavior of find and agg for validation of $meta:"textScore". In particular, find operations no longer require a "textScore" $meta projection in order to specify a "textScore" $meta sort. This brings find into alignment with agg, which never had such a restriction. It is also now legal for a find command to sort on the field overridden by a $meta:"textScore" projection without specifying the $meta operator in the sort pattern. In addition: - Tightens validation around uses of "textScore" $meta projections for queries that do not specify a $text predicate. - Fixes a bug in which $natural sort/hint were not correctly validated.
author: David Storch <david.storch@mongodb.com> 2019-12-18 03:28:25 +0000
committer: evergreen <evergreen@mongodb.com> 2019-12-18 03:28:25 +0000
commit: 1b75fcdcaf9c320ef8b916204d1eb51c4bab8a5f (patch)
tree: 237bb5cddc47f6918992d10f343ede42acf204e0 /jstests
parent: e83c51c61fa9907459e1cfac9e4bb3517445612f (diff)
download: mongo-1b75fcdcaf9c320ef8b916204d1eb51c4bab8a5f.tar.gz
8 files changed, 197 insertions, 87 deletions
diff --git a/jstests/aggregation/optimize_away_pipeline.js b/jstests/aggregation/optimize_away_pipeline.js
index 194ed5201ce..ab4be0995aa 100644
--- a/jstests/aggregation/optimize_away_pipeline.js
+++ b/jstests/aggregation/optimize_away_pipeline.js
@@ -231,12 +231,12 @@ assertPipelineUsesAggregation({
     expectedResult: [{_id: "null", s: 50}]
 });
 
-// TODO SERVER-40253: We cannot optimize away text search queries.
+// Test that we can optimize away a pipeline with a $text search predicate.
 assert.commandWorked(coll.createIndex({y: "text"}));
-assertPipelineUsesAggregation(
+assertPipelineDoesNotUseAggregation(
     {pipeline: [{$match: {$text: {$search: "abc"}}}], expectedStages: ["IXSCAN"]});
 // Test that $match, $sort, and $project all get answered by the PlanStage layer for a $text query.
-assertPipelineUsesAggregation({
+assertPipelineDoesNotUseAggregation({
     pipeline:
         [{$match: {$text: {$search: "abc"}}}, {$sort: {sortField: 1}}, {$project: {a: 1, b: 1}}],
     expectedStages: ["TEXT", "SORT", "PROJECTION_SIMPLE"],
diff --git a/jstests/aggregation/use_query_sort.js b/jstests/aggregation/use_query_sort.js
index ec14625e856..c2fb0e3feb6 100644
--- a/jstests/aggregation/use_query_sort.js
+++ b/jstests/aggregation/use_query_sort.js
@@ -76,11 +76,13 @@ assertHasBlockingQuerySort([{$match: {_id: {$gte: 90}}}, {$sort: {x: 1}}], true)
 // A query of the same shape will use a non-blocking plan if the predicate is not selective.
 assertHasNonBlockingQuerySort([{$match: {_id: {$gte: 0}}}, {$sort: {x: 1}}], true);
 
-// Meta-sort on "textScore" currently cannot be pushed down into the query layer. See SERVER-43816.
+// Verify that meta-sort on "textScore" can be pushed down into the query layer.
 assert.commandWorked(coll.createIndex({x: "text"}));
-assertDoesNotHaveQuerySort(
+assertHasBlockingQuerySort(
     [{$match: {$text: {$search: "test"}}}, {$sort: {key: {$meta: "textScore"}}}], false);
 
-// Meta-sort on "randVal" cannot be pushed into the query layer. See SERVER-43816.
-assertDoesNotHaveQuerySort([{$sort: {key: {$meta: "randVal"}}}], false);
+// Verify that meta-sort on "randVal" can be pushed into the query layer. Although "randVal" $meta
+// sort is currently a supported way to randomize the order of the data, it shouldn't preclude
+// pushdown of the sort into the plan stage layer.
+assertHasBlockingQuerySort([{$sort: {key: {$meta: "randVal"}}}], false);
 }());
diff --git a/jstests/core/fts_proj.js b/jstests/core/fts_proj.js
index b59c02cc293..2834b4d03eb 100644
--- a/jstests/core/fts_proj.js
+++ b/jstests/core/fts_proj.js
@@ -1,16 +1,35 @@
-t = db.text_proj;
-t.drop();
+/**
+ * Projection tests for FTS queries.
+ *
+ * Requires all nodes to be binary version 4.4, since this file includes test for additional
+ * validation of the query added in 4.4.
+ * @tags: [requires_fcv_44]
+ */
+(function() {
+"use strict";
 
-t.save({_id: 1, x: "a", y: "b", z: "c"});
-t.save({_id: 2, x: "d", y: "e", z: "f"});
-t.save({_id: 3, x: "a", y: "g", z: "h"});
+const kUnavailableMetadataErrCode = 40218;
 
-t.ensureIndex({x: "text"}, {default_language: "none"});
+const coll = db.text_proj;
+coll.drop();
 
-res = t.find({"$text": {"$search": "a"}});
-assert.eq(2, res.length());
-assert(res[0].y, tojson(res.toArray()));
+assert.commandWorked(coll.insert({_id: 1, x: "a", y: "b", z: "c"}));
+assert.commandWorked(coll.insert({_id: 2, x: "d", y: "e", z: "f"}));
+assert.commandWorked(coll.insert({_id: 3, x: "a", y: "g", z: "h"}));
 
-res = t.find({"$text": {"$search": "a"}}, {x: 1});
-assert.eq(2, res.length());
-assert(!res[0].y, tojson(res.toArray()));
+assert.commandWorked(coll.createIndex({x: "text"}, {default_language: "none"}));
+
+let res = coll.find({"$text": {"$search": "a"}}).toArray();
+assert.eq(2, res.length, res);
+assert(res[0].y, res);
+
+res = coll.find({"$text": {"$search": "a"}}, {x: 1}).toArray();
+assert.eq(2, res.length, res);
+assert(!res[0].y, res);
+
+// Text score $meta projection fails if there is no $text predicate, for both find and agg.
+let error = assert.throws(() => coll.find({}, {score: {$meta: "textScore"}}).itcount());
+assert.commandFailedWithCode(error, kUnavailableMetadataErrCode);
+error = assert.throws(() => coll.aggregate([{$project: {score: {$meta: "textScore"}}}]).itcount());
+assert.commandFailedWithCode(error, kUnavailableMetadataErrCode);
+}());
diff --git a/jstests/core/fts_projection.js b/jstests/core/fts_projection.js
index af48ca8a12d..5ef192203f2 100644
--- a/jstests/core/fts_projection.js
+++ b/jstests/core/fts_projection.js
@@ -8,10 +8,10 @@ load("jstests/libs/analyze_plan.js");
 var t = db.getSiblingDB("test").getCollection("fts_projection");
 t.drop();
 
-t.insert({_id: 0, a: "textual content"});
-t.insert({_id: 1, a: "additional content", b: -1});
-t.insert({_id: 2, a: "irrelevant content"});
-t.ensureIndex({a: "text"});
+assert.commandWorked(t.insert({_id: 0, a: "textual content"}));
+assert.commandWorked(t.insert({_id: 1, a: "additional content", b: -1}));
+assert.commandWorked(t.insert({_id: 2, a: "irrelevant content"}));
+assert.commandWorked(t.createIndex({a: "text"}));
 
 // Project the text score.
 var results = t.find({$text: {$search: "textual content -irrelevant"}}, {
@@ -69,9 +69,6 @@ for (var i = 0; i < results.length; ++i) {
 
 assert.neq(-1, results[0].b);
 
-// Don't crash if we have no text score.
-var results = t.find({a: /text/}, {score: {$meta: "textScore"}}).toArray();
-
 // SERVER-12173
 // When $text operator is in $or, should evaluate first
 results = t.find({$or: [{$text: {$search: "textual content -irrelevant"}}, {_id: 1}]}, {
@@ -122,13 +119,4 @@ assert(results[0].score,
        "invalid text score for " + tojson(results[0], '', true) + " when $text is in $or");
 assert(results[1].score,
        "invalid text score for " + tojson(results[0], '', true) + " when $text is in $or");
-
-// Project a text score when the query has no $text component. The behavior of $meta expression is
-// to evaluate to "missing" when the metadata is absent, so there should be no "score" field in the
-// output documents.
-results = t.find({}, {score: {$meta: "textScore"}}).toArray();
-assert.eq(results.length, 3);
-for (let res of results) {
-    assert(!res.hasOwnProperty("score"));
-}
 })();
diff --git a/jstests/core/fts_score_sort.js b/jstests/core/fts_score_sort.js
index 4b7189c1a4c..2a45d441677 100644
--- a/jstests/core/fts_score_sort.js
+++ b/jstests/core/fts_score_sort.js
@@ -1,61 +1,127 @@
 // Test sorting with text score metadata.
+//
+// Includes tests for changes to validation around "textScore" $meta that were introduced in 4.4.
+// @tags: [requires_fcv_44]
 (function() {
 "use strict";
 
-var t = db.getSiblingDB("test").getCollection("fts_score_sort");
-t.drop();
+const kUnavailableMetadataErrCode = 40218;
 
-assert.commandWorked(t.insert({_id: 0, a: "textual content"}));
-assert.commandWorked(t.insert({_id: 1, a: "additional content"}));
-assert.commandWorked(t.insert({_id: 2, a: "irrelevant content"}));
-assert.commandWorked(t.ensureIndex({a: "text"}));
+const coll = db.getSiblingDB("test").getCollection("fts_score_sort");
+coll.drop();
+
+assert.commandWorked(coll.insert({_id: 0, a: "textual content"}));
+assert.commandWorked(coll.insert({_id: 1, a: "additional content"}));
+assert.commandWorked(coll.insert({_id: 2, a: "irrelevant content"}));
+assert.commandWorked(coll.createIndex({a: "text"}));
 
 // $meta sort specification should be rejected if it has additional keys.
 assert.throws(function() {
-    t.find({$text: {$search: "textual content"}}, {score: {$meta: "textScore"}})
+    coll.find({$text: {$search: "textual content"}}, {score: {$meta: "textScore"}})
         .sort({score: {$meta: "textScore", extra: 1}})
         .itcount();
 });
 
 // $meta sort specification should be rejected if the type of meta sort is not known.
 assert.throws(function() {
-    t.find({$text: {$search: "textual content"}}, {score: {$meta: "textScore"}})
+    coll.find({$text: {$search: "textual content"}}, {score: {$meta: "textScore"}})
         .sort({score: {$meta: "unknown"}})
         .itcount();
 });
 
 // Sort spefication should be rejected if a $-keyword other than $meta is used.
 assert.throws(function() {
-    t.find({$text: {$search: "textual content"}}, {score: {$meta: "textScore"}})
+    coll.find({$text: {$search: "textual content"}}, {score: {$meta: "textScore"}})
         .sort({score: {$notMeta: "textScore"}})
         .itcount();
 });
 
 // Sort spefication should be rejected if it is a string, not an object with $meta.
 assert.throws(function() {
-    t.find({$text: {$search: "textual content"}}, {score: {$meta: "textScore"}})
+    coll.find({$text: {$search: "textual content"}}, {score: {$meta: "textScore"}})
         .sort({score: "textScore"})
         .itcount();
 });
 
 // Sort by the text score.
-var results =
-    t.find({$text: {$search: "textual content -irrelevant"}}, {score: {$meta: "textScore"}})
+let results =
+    coll.find({$text: {$search: "textual content -irrelevant"}}, {score: {$meta: "textScore"}})
         .sort({score: {$meta: "textScore"}})
         .toArray();
-assert.eq(results.length, 2);
-assert.eq(results[0]._id, 0);
-assert.eq(results[1]._id, 1);
-assert.gt(results[0].score, results[1].score);
+assert.eq(
+    results,
+    [{_id: 0, a: "textual content", score: 1.5}, {_id: 1, a: "additional content", score: 0.75}]);
 
 // Sort by {_id descending, score} and verify the order is right.
-var results =
-    t.find({$text: {$search: "textual content -irrelevant"}}, {score: {$meta: "textScore"}})
+results =
+    coll.find({$text: {$search: "textual content -irrelevant"}}, {score: {$meta: "textScore"}})
         .sort({_id: -1, score: {$meta: "textScore"}})
         .toArray();
-assert.eq(results.length, 2);
-assert.eq(results[0]._id, 1);
-assert.eq(results[1]._id, 0);
-// Note the reversal from above.
-assert.lt(results[0].score, results[1].score);
+assert.eq(
+    results,
+    [{_id: 1, a: "additional content", score: 0.75}, {_id: 0, a: "textual content", score: 1.5}]);
+
+// Can $meta sort by text score without a meta projection using either find or agg.
+let expectedResults = [{_id: 0, a: "textual content"}, {_id: 1, a: "additional content"}];
+results = coll.find({$text: {$search: "textual content -irrelevant"}})
+              .sort({score: {$meta: "textScore"}})
+              .toArray();
+assert.eq(results, expectedResults);
+results = coll.aggregate([
+                  {$match: {$text: {$search: "textual content -irrelevant"}}},
+                  {$sort: {score: {$meta: "textScore"}}}
+              ])
+              .toArray();
+assert.eq(results, expectedResults);
+
+// $meta-sort by text score fails if there is no $text predicate, for both find and agg.
+let error = assert.throws(() => coll.find().sort({score: {$meta: "textScore"}}).itcount());
+assert.commandFailedWithCode(error, kUnavailableMetadataErrCode);
+error = assert.throws(() => coll.aggregate([{$sort: {score: {$meta: "textScore"}}}]).itcount());
+assert.commandFailedWithCode(error, kUnavailableMetadataErrCode);
+
+// Test a sort pattern like {<field>: {$meta: "textScore"}} is legal even if <field> is explicitly
+// included by the projection. Test that this is true for both the find and aggregate commands.
+expectedResults = [{a: "textual content"}, {a: "additional content"}];
+results = coll.find({$text: {$search: "textual content -irrelevant"}}, {_id: 0, a: 1})
+              .sort({a: {$meta: "textScore"}})
+              .toArray();
+assert.eq(results, expectedResults);
+results = coll.aggregate([
+                  {$match: {$text: {$search: "textual content -irrelevant"}}},
+                  {$project: {_id: 0, a: 1}},
+                  {$sort: {a: {$meta: "textScore"}}}
+              ])
+              .toArray();
+assert.eq(results, expectedResults);
+
+// Test that both find and agg can $meta-project the textScore with a non-$meta sort on the same
+// field. The semantics of find are that the sort logically occurs before the projection, so we
+// expect the data to be sorted according the values that were present prior to the $meta
+// projection.
+expectedResults = [{_id: 0, a: 0.75}, {_id: 1, a: 1.5}];
+results = coll.find({$text: {$search: "additional content -irrelevant"}},
+                    {_id: 1, a: {$meta: "textScore"}})
+              .sort({a: -1})
+              .toArray();
+assert.eq(results, expectedResults);
+results = coll.aggregate([
+                  {$match: {$text: {$search: "additional content -irrelevant"}}},
+                  {$sort: {a: -1}},
+                  {$project: {_id: 1, a: {$meta: "textScore"}}}
+              ])
+              .toArray();
+assert.eq(results, expectedResults);
+
+// Test that an aggregate command with a $project-then-$sort pipeline can sort on the
+// $meta-projected data without repeating the $meta operator in the $sort.
+results = coll.aggregate([
+                  {$match: {$text: {$search: "textual content -irrelevant"}}},
+                  {$project: {a: 1, score: {$meta: "textScore"}}},
+                  {$sort: {score: -1}}
+              ])
+              .toArray();
+assert.eq(
+    results,
+    [{_id: 0, a: "textual content", score: 1.5}, {_id: 1, a: "additional content", score: 0.75}]);
 }());
diff --git a/jstests/core/natural.js b/jstests/core/natural.js
index fa7a511eda1..75ede89e648 100644
--- a/jstests/core/natural.js
+++ b/jstests/core/natural.js
@@ -1,4 +1,7 @@
 // Tests for $natural sort and $natural hint.
+//
+// Includes tests for improved validation of $natural sort/hint added in 4.4.
+// @tags: [requires_fcv_44]
 (function() {
 'use strict';
 
@@ -7,7 +10,7 @@ var results;
 var coll = db.jstests_natural;
 coll.drop();
 
-assert.commandWorked(coll.ensureIndex({a: 1}));
+assert.commandWorked(coll.createIndex({a: 1}));
 assert.commandWorked(coll.insert({_id: 1, a: 3}));
 assert.commandWorked(coll.insert({_id: 2, a: 2}));
 assert.commandWorked(coll.insert({_id: 3, a: 1}));
@@ -23,4 +26,21 @@ assert.eq(results[0], {_id: 2, a: 2});
 results = coll.find({a: 2}).hint({$natural: -1}).toArray();
 assert.eq(results.length, 1);
 assert.eq(results[0], {_id: 2, a: 2});
+
+// $natural hint with non-$natural sort is allowed.
+assert.eq([{_id: 3, a: 1}, {_id: 2, a: 2}, {_id: 1, a: 3}],
+          coll.find().hint({$natural: 1}).sort({a: 1}).toArray());
+
+// $natural sort with non-$natural hint is not allowed.
+assert.throws(() => coll.find().hint({a: 1}).sort({$natural: 1}).itcount());
+
+// Test that a compound $natural hint is not allowed.
+assert.throws(() => coll.find().hint({a: 1, $natural: 1}).itcount());
+assert.throws(() => coll.find().hint({$natural: 1, b: 1}).itcount());
+assert.throws(() => coll.find().hint({a: 1, $natural: 1, b: 1}).itcount());
+
+// Test that a compound $natural sort is not allowed.
+assert.throws(() => coll.find().sort({a: 1, $natural: 1}).itcount());
+assert.throws(() => coll.find().sort({$natural: 1, b: 1}).itcount());
+assert.throws(() => coll.find().sort({a: 1, $natural: 1, b: 1}).itcount());
 })();
diff --git a/jstests/core/views/views_find.js b/jstests/core/views/views_find.js
index 1468870d40e..23c6203831b 100644
--- a/jstests/core/views/views_find.js
+++ b/jstests/core/views/views_find.js
@@ -1,6 +1,13 @@
 /**
  * Tests the find command on views.
- * @tags: [requires_find_command, requires_getmore]
+ *
+ * @tags: [
+ *   requires_find_command,
+ *   requires_getmore,
+ *   # Includes tests for allowing $natural sort against a view, but support for this feature
+ *   # requires all nodes to be binary version 4.4.
+ *   requires_fcv_44,
+ * ]
  */
 (function() {
 "use strict";
@@ -62,8 +69,13 @@ assertFindResultEq({find: "identityView", sort: {_id: 1}}, allDocuments, doOrder
 assertFindResultEq(
     {find: "identityView", limit: 1, batchSize: 1, sort: {_id: 1}, projection: {_id: 1}},
     [{_id: "New York"}]);
-assert.commandFailedWithCode(viewsDB.runCommand({find: "identityView", sort: {$natural: 1}}),
-                             ErrorCodes.InvalidPipelineOperator);
+
+// $natural sort against a view is permitted, since it has the same meaning as $natural hint.
+// Likewise, $natural hint against a view is permitted.
+assertFindResultEq({find: "identityView", filter: {state: "NY"}, sort: {$natural: 1}},
+                   [{_id: "New York", state: "NY", pop: 7}]);
+assertFindResultEq({find: "identityView", filter: {state: "NY"}, hint: {$natural: 1}},
+                   [{_id: "New York", state: "NY", pop: 7}]);
 
 // Negative batch size and limit should fail.
 assert.commandFailed(viewsDB.runCommand({find: "identityView", batchSize: -1}));
diff --git a/jstests/sharding/fts_score_sort_sharded.js b/jstests/sharding/fts_score_sort_sharded.js
index a6447cb4b38..fbeec5d8497 100644
--- a/jstests/sharding/fts_score_sort_sharded.js
+++ b/jstests/sharding/fts_score_sort_sharded.js
@@ -1,5 +1,9 @@
 // Test that queries with a sort on text metadata return results in the correct order in a sharded
 // collection.
+//
+// Require all nodes to be 4.4, since validation around $meta:"textScore" was relaxed in that
+// version, and the new behavior is tested here.
+// @tags: [requires_fcv_44]
 
 var st = new ShardingTest({shards: 2});
 st.stopBalancer();
@@ -31,7 +35,7 @@ assert.commandWorked(coll.ensureIndex({a: "text"}));
 //
 // Execute query with sort on document score, verify results are in correct order.
 //
-var results = coll.find({$text: {$search: "pizza"}}, {s: {$meta: "textScore"}})
+let results = coll.find({$text: {$search: "pizza"}}, {s: {$meta: "textScore"}})
                   .sort({s: {$meta: "textScore"}})
                   .toArray();
 assert.eq(results.length, 4, results);
@@ -40,33 +44,32 @@ assert.eq(results[1]._id, 2, results);
 assert.eq(results[2]._id, -1, results);
 assert.eq(results[3]._id, 1, results);
 
-//
-// Verify that mongos requires the text metadata sort to be specified in the projection.
-//
-
 // Projection not specified at all.
-cursor = coll.find({$text: {$search: "pizza"}}).sort({s: {$meta: "textScore"}});
-assert.throws(function() {
-    cursor.next();
-});
+results = coll.find({$text: {$search: "pizza"}}).sort({s: {$meta: "textScore"}}).toArray();
+assert.eq(results, [
+    {_id: -2, a: "pizza pizza pizza pizza"},
+    {_id: 2, a: "pizza pizza pizza"},
+    {_id: -1, a: "pizza pizza"},
+    {_id: 1, a: "pizza"}
+]);
+
+// Projection and sort specified with different field names.
+results = coll.find({$text: {$search: "pizza"}}, {t: {$meta: "textScore"}})
+              .sort({s: {$meta: "textScore"}})
+              .toArray();
+assert.eq(results, [
+    {_id: -2, a: "pizza pizza pizza pizza", t: 1.875},
+    {_id: 2, a: "pizza pizza pizza", t: 1.75},
+    {_id: -1, a: "pizza pizza", t: 1.5},
+    {_id: 1, a: "pizza", t: 1.1}
+]);
 
-// Projection specified with incorrect field name.
-cursor = coll.find({$text: {$search: "pizza"}}, {t: {$meta: "textScore"}}).sort({
-    s: {$meta: "textScore"}
-});
-assert.throws(function() {
-    cursor.next();
-});
+// $meta-sort on the same field name that is included in the projection without the $meta operator.
+results = coll.find({$text: {$search: "pizza"}}, {s: 1}).sort({s: {$meta: "textScore"}}).toArray();
+assert.eq(results, [{_id: -2}, {_id: 2}, {_id: -1}, {_id: 1}]);
 
-// Projection specified on correct field but with wrong sort.
-cursor = coll.find({$text: {$search: "pizza"}}, {s: 1}).sort({s: {$meta: "textScore"}});
-assert.throws(function() {
-    cursor.next();
-});
-cursor = coll.find({$text: {$search: "pizza"}}, {s: -1}).sort({s: {$meta: "textScore"}});
-assert.throws(function() {
-    cursor.next();
-});
+results = coll.find({$text: {$search: "pizza"}}, {s: -1}).sort({s: {$meta: "textScore"}}).toArray();
+assert.eq(results, [{_id: -2}, {_id: 2}, {_id: -1}, {_id: 1}]);
 
 //
 // Execute query with a compound sort that includes the text score along with a multikey field.
author	David Storch <david.storch@mongodb.com>	2019-12-18 03:28:25 +0000
committer	evergreen <evergreen@mongodb.com>	2019-12-18 03:28:25 +0000
commit	1b75fcdcaf9c320ef8b916204d1eb51c4bab8a5f (patch)
tree	237bb5cddc47f6918992d10f343ede42acf204e0 /jstests
parent	e83c51c61fa9907459e1cfac9e4bb3517445612f (diff)
download	mongo-1b75fcdcaf9c320ef8b916204d1eb51c4bab8a5f.tar.gz