summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Boros <puppyofkosh@gmail.com>2019-05-15 19:05:00 -0400
committerIan Boros <puppyofkosh@gmail.com>2019-05-20 21:38:07 -0400
commitd7c2ad76b418912c16d4250bdbe74f8a9b3f51ca (patch)
tree4ff2622d3a38fbf67d73f848e2deaf3c6c5e2cc5
parentcb7eff01f675d854d7484bd68ce298a8817d2af2 (diff)
downloadmongo-d7c2ad76b418912c16d4250bdbe74f8a9b3f51ca.tar.gz
SERVER-40555 add searchSnippet metadata
-rw-r--r--jstests/core/find_projection_meta_errors.js4
-rw-r--r--jstests/sharding/aggregates_during_balancing.js4
-rw-r--r--src/mongo/db/pipeline/document.cpp43
-rw-r--r--src/mongo/db/pipeline/document.h23
-rw-r--r--src/mongo/db/pipeline/document_internal.h24
-rw-r--r--src/mongo/db/pipeline/document_value_test.cpp96
-rw-r--r--src/mongo/db/pipeline/expression.cpp10
-rw-r--r--src/mongo/db/pipeline/expression.h1
-rw-r--r--src/mongo/db/pipeline/expression_test.cpp28
9 files changed, 225 insertions, 8 deletions
diff --git a/jstests/core/find_projection_meta_errors.js b/jstests/core/find_projection_meta_errors.js
index bbf91801ee0..6fd69cb9d04 100644
--- a/jstests/core/find_projection_meta_errors.js
+++ b/jstests/core/find_projection_meta_errors.js
@@ -14,6 +14,10 @@
ErrorCodes.BadValue);
assert.commandFailedWithCode(
+ db.runCommand({find: coll.getName(), projection: {score: {$meta: "searchHighlights"}}}),
+ ErrorCodes.BadValue);
+
+ assert.commandFailedWithCode(
db.runCommand({find: coll.getName(), projection: {score: {$meta: "some garbage"}}}),
ErrorCodes.BadValue);
}());
diff --git a/jstests/sharding/aggregates_during_balancing.js b/jstests/sharding/aggregates_during_balancing.js
index 149f6031583..003674e1530 100644
--- a/jstests/sharding/aggregates_during_balancing.js
+++ b/jstests/sharding/aggregates_during_balancing.js
@@ -179,7 +179,9 @@
function testSample() {
jsTestLog('testing $sample');
[0, 1, 10, nItems, nItems + 1].forEach(function(size) {
- var res = db.ts1.aggregate([{$sample: {size: size}}]).toArray();
+ // Run with 'allowDiskUse' set to true because this may exceed the in-memory sort
+ // limit.
+ var res = db.ts1.aggregate([{$sample: {size: size}}], {allowDiskUse: true}).toArray();
assert.eq(res.length, Math.min(nItems, size));
});
}
diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp
index e7d74aadbfe..a4c17d92501 100644
--- a/src/mongo/db/pipeline/document.cpp
+++ b/src/mongo/db/pipeline/document.cpp
@@ -45,12 +45,14 @@ using std::vector;
const DocumentStorage DocumentStorage::kEmptyDoc;
-const std::vector<StringData> Document::allMetadataFieldNames = {Document::metaFieldTextScore,
- Document::metaFieldRandVal,
- Document::metaFieldSortKey,
- Document::metaFieldGeoNearDistance,
- Document::metaFieldGeoNearPoint,
- Document::metaFieldSearchScore};
+const std::vector<StringData> Document::allMetadataFieldNames = {
+ Document::metaFieldGeoNearDistance,
+ Document::metaFieldGeoNearPoint,
+ Document::metaFieldRandVal,
+ Document::metaFieldSearchHighlights,
+ Document::metaFieldSearchScore,
+ Document::metaFieldSortKey,
+ Document::metaFieldTextScore};
Position DocumentStorage::findField(StringData requested) const {
int reqSize = requested.size(); // get size calculation out of the way if needed
@@ -226,11 +228,24 @@ intrusive_ptr<DocumentStorage> DocumentStorage::clone() const {
out->_geoNearDistance = _geoNearDistance;
out->_geoNearPoint = _geoNearPoint.getOwned();
out->_searchScore = _searchScore;
+ out->_searchHighlights = _searchHighlights;
}
return out;
}
+size_t DocumentStorage::getMetadataApproximateSize() const {
+ size_t size = 0;
+ size += sizeof(_textScore);
+ size += sizeof(_randVal);
+ size += _sortKey.objsize();
+ size += sizeof(_geoNearDistance);
+ size += _geoNearPoint.getApproximateSize();
+ size += sizeof(_searchScore);
+ size += _searchHighlights.getApproximateSize();
+ return size;
+}
+
DocumentStorage::~DocumentStorage() {
std::unique_ptr<char[]> deleteBufferAtScopeEnd(_buffer);
@@ -292,6 +307,7 @@ constexpr StringData Document::metaFieldSortKey;
constexpr StringData Document::metaFieldGeoNearDistance;
constexpr StringData Document::metaFieldGeoNearPoint;
constexpr StringData Document::metaFieldSearchScore;
+constexpr StringData Document::metaFieldSearchHighlights;
BSONObj Document::toBsonWithMetaData() const {
BSONObjBuilder bb;
@@ -308,6 +324,8 @@ BSONObj Document::toBsonWithMetaData() const {
getGeoNearPoint().addToBsonObj(&bb, metaFieldGeoNearPoint);
if (hasSearchScore())
bb.append(metaFieldSearchScore, getSearchScore());
+ if (hasSearchHighlights())
+ getSearchHighlights().addToBsonObj(&bb, metaFieldSearchHighlights);
return bb.obj();
}
@@ -325,6 +343,9 @@ Document Document::fromBsonWithMetaData(const BSONObj& bson) {
} else if (fieldName == metaFieldSearchScore) {
md.setSearchScore(elem.Double());
continue;
+ } else if (fieldName == metaFieldSearchHighlights) {
+ md.setSearchHighlights(Value(elem));
+ continue;
} else if (fieldName == metaFieldRandVal) {
md.setRandMetaField(elem.Double());
continue;
@@ -431,6 +452,9 @@ size_t Document::getApproximateSize() const {
size -= sizeof(Value); // already accounted for above
}
+ // The metadata also occupies space in the document storage that's pre-allocated.
+ size += getMetadataApproximateSize();
+
return size;
}
@@ -526,6 +550,10 @@ void Document::serializeForSorter(BufBuilder& buf) const {
buf.appendNum(char(DocumentStorage::MetaType::SEARCH_SCORE + 1));
buf.appendNum(getSearchScore());
}
+ if (hasSearchHighlights()) {
+ buf.appendNum(char(DocumentStorage::MetaType::SEARCH_HIGHLIGHTS + 1));
+ getSearchHighlights().serializeForSorter(buf);
+ }
buf.appendNum(char(0));
}
@@ -547,6 +575,9 @@ Document Document::deserializeForSorter(BufReader& buf, const SorterDeserializeS
BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings()));
} else if (marker == char(DocumentStorage::MetaType::SEARCH_SCORE) + 1) {
doc.setSearchScore(buf.read<LittleEndian<double>>());
+ } else if (marker == char(DocumentStorage::MetaType::SEARCH_HIGHLIGHTS) + 1) {
+ doc.setSearchHighlights(
+ Value::deserializeForSorter(buf, Value::SorterDeserializeSettings()));
} else {
uasserted(28744, "Unrecognized marker, unable to deserialize buffer");
}
diff --git a/src/mongo/db/pipeline/document.h b/src/mongo/db/pipeline/document.h
index 6d765a24c65..f465ecce0e2 100644
--- a/src/mongo/db/pipeline/document.h
+++ b/src/mongo/db/pipeline/document.h
@@ -97,6 +97,7 @@ public:
static constexpr StringData metaFieldGeoNearDistance = "$dis"_sd;
static constexpr StringData metaFieldGeoNearPoint = "$pt"_sd;
static constexpr StringData metaFieldSearchScore = "$searchScore"_sd;
+ static constexpr StringData metaFieldSearchHighlights = "$searchHighlights"_sd;
static const std::vector<StringData> allMetadataFieldNames;
@@ -160,10 +161,21 @@ public:
/** Get the approximate storage size of the document and sub-values in bytes.
* Note: Some memory may be shared with other Documents or between fields within
* a single Document so this can overestimate usage.
+ *
+ * Note: the value returned by this function includes the size of the metadata associated with
+ * the document.
*/
size_t getApproximateSize() const;
/**
+ * Return the approximate amount of space used by metadata. Note that documents may reserve
+ * space for metadata even no metadata is used.
+ */
+ size_t getMetadataApproximateSize() const {
+ return storage().getMetadataApproximateSize();
+ }
+
+ /**
* Compare two documents. Most callers should prefer using DocumentComparator instead. See
* document_comparator.h for details.
*
@@ -291,6 +303,13 @@ public:
return storage().getSearchScore();
}
+ bool hasSearchHighlights() const {
+ return storage().hasSearchHighlights();
+ }
+ Value getSearchHighlights() const {
+ return storage().getSearchHighlights();
+ }
+
/// members for Sorter
struct SorterDeserializeSettings {}; // unused
void serializeForSorter(BufBuilder& buf) const;
@@ -554,6 +573,10 @@ public:
storage().setSearchScore(score);
}
+ void setSearchHighlights(Value highlights) {
+ storage().setSearchHighlights(highlights);
+ }
+
/** Convert to a read-only document and release reference.
*
* Call this to indicate that you are done with this Document and will
diff --git a/src/mongo/db/pipeline/document_internal.h b/src/mongo/db/pipeline/document_internal.h
index 71b28d2aeb8..1d2b1b58951 100644
--- a/src/mongo/db/pipeline/document_internal.h
+++ b/src/mongo/db/pipeline/document_internal.h
@@ -204,6 +204,7 @@ public:
GEONEAR_DIST,
GEONEAR_POINT,
SEARCH_SCORE,
+ SEARCH_HIGHLIGHTS,
// New fields must be added before the NUM_FIELDS sentinel.
NUM_FIELDS
@@ -279,6 +280,12 @@ public:
}
/**
+ * Compute the space allocated for the metadata fields. Will account for space allocated for
+ * unused metadata fields as well.
+ */
+ size_t getMetadataApproximateSize() const;
+
+ /**
* Copies all metadata from source if it has any.
* Note: does not clear metadata from this.
*/
@@ -301,6 +308,9 @@ public:
if (source.hasSearchScore()) {
setSearchScore(source.getSearchScore());
}
+ if (source.hasSearchHighlights()) {
+ setSearchHighlights(source.getSearchHighlights());
+ }
}
bool hasTextScore() const {
@@ -369,6 +379,17 @@ public:
_searchScore = score;
}
+ bool hasSearchHighlights() const {
+ return _metaFields.test(MetaType::SEARCH_HIGHLIGHTS);
+ }
+ Value getSearchHighlights() const {
+ return _searchHighlights;
+ }
+ void setSearchHighlights(Value highlights) {
+ _metaFields.set(MetaType::SEARCH_HIGHLIGHTS);
+ _searchHighlights = highlights;
+ }
+
private:
/// Same as lastElement->next() or firstElement() if empty.
const ValueElement* end() const {
@@ -454,7 +475,8 @@ private:
double _geoNearDistance;
Value _geoNearPoint;
double _searchScore;
- // When adding a field, make sure to update clone() method
+ Value _searchHighlights;
+ // When adding a field, make sure to update clone() and getMetadataApproximateSize() methods.
// Defined in document.cpp
static const DocumentStorage kEmptyDoc;
diff --git a/src/mongo/db/pipeline/document_value_test.cpp b/src/mongo/db/pipeline/document_value_test.cpp
index 438a980c10f..e758dadf2a3 100644
--- a/src/mongo/db/pipeline/document_value_test.cpp
+++ b/src/mongo/db/pipeline/document_value_test.cpp
@@ -523,6 +523,51 @@ TEST(MetaFields, RandValBasics) {
ASSERT_EQ(2.0, doc2.getRandMetaField());
}
+TEST(MetaFields, SearchScoreBasic) {
+ // Documents should not have a search score until it is set.
+ ASSERT_FALSE(Document().hasSearchScore());
+
+ // Setting the search score field should work as expected.
+ MutableDocument docBuilder;
+ docBuilder.setSearchScore(1.23);
+ Document doc = docBuilder.freeze();
+ ASSERT_TRUE(doc.hasSearchScore());
+ ASSERT_EQ(1.23, doc.getSearchScore());
+
+ // Setting the searchScore twice should keep the second value.
+ MutableDocument docBuilder2;
+ docBuilder2.setSearchScore(1.0);
+ docBuilder2.setSearchScore(2.0);
+ Document doc2 = docBuilder2.freeze();
+ ASSERT_TRUE(doc2.hasSearchScore());
+ ASSERT_EQ(2.0, doc2.getSearchScore());
+}
+
+TEST(MetaFields, SearchHighlightsBasic) {
+ // Documents should not have a search highlights until it is set.
+ ASSERT_FALSE(Document().hasSearchHighlights());
+
+ // Setting the search highlights field should work as expected.
+ MutableDocument docBuilder;
+ Value highlights = DOC_ARRAY("a"_sd
+ << "b"_sd);
+ docBuilder.setSearchHighlights(highlights);
+ Document doc = docBuilder.freeze();
+ ASSERT_TRUE(doc.hasSearchHighlights());
+ ASSERT_VALUE_EQ(doc.getSearchHighlights(), highlights);
+
+ // Setting the searchHighlights twice should keep the second value.
+ MutableDocument docBuilder2;
+ Value otherHighlights = DOC_ARRAY("snippet1"_sd
+ << "snippet2"_sd
+ << "snippet3"_sd);
+ docBuilder2.setSearchHighlights(highlights);
+ docBuilder2.setSearchHighlights(otherHighlights);
+ Document doc2 = docBuilder2.freeze();
+ ASSERT_TRUE(doc2.hasSearchHighlights());
+ ASSERT_VALUE_EQ(doc2.getSearchHighlights(), otherHighlights);
+}
+
class SerializationTest : public unittest::Test {
protected:
Document roundTrip(const Document& input) {
@@ -538,10 +583,16 @@ protected:
ASSERT_DOCUMENT_EQ(output, input);
ASSERT_EQ(output.hasTextScore(), input.hasTextScore());
ASSERT_EQ(output.hasRandMetaField(), input.hasRandMetaField());
+ ASSERT_EQ(output.hasSearchScore(), input.hasSearchScore());
+ ASSERT_EQ(output.hasSearchHighlights(), input.hasSearchHighlights());
if (input.hasTextScore())
ASSERT_EQ(output.getTextScore(), input.getTextScore());
if (input.hasRandMetaField())
ASSERT_EQ(output.getRandMetaField(), input.getRandMetaField());
+ if (input.hasSearchScore())
+ ASSERT_EQ(output.getSearchScore(), input.getSearchScore());
+ if (input.hasSearchHighlights())
+ ASSERT_VALUE_EQ(output.getSearchHighlights(), input.getSearchHighlights());
ASSERT(output.toBson().binaryEqual(input.toBson()));
}
@@ -551,6 +602,9 @@ TEST_F(SerializationTest, MetaSerializationNoVals) {
MutableDocument docBuilder;
docBuilder.setTextScore(10.0);
docBuilder.setRandMetaField(20.0);
+ docBuilder.setSearchScore(30.0);
+ docBuilder.setSearchHighlights(DOC_ARRAY("abc"_sd
+ << "def"_sd));
assertRoundTrips(docBuilder.freeze());
}
@@ -559,6 +613,19 @@ TEST_F(SerializationTest, MetaSerializationWithVals) {
MutableDocument docBuilder(DOC("foo" << 10));
docBuilder.setTextScore(10.0);
docBuilder.setRandMetaField(20.0);
+ docBuilder.setSearchScore(30.0);
+ docBuilder.setSearchHighlights(DOC_ARRAY("abc"_sd
+ << "def"_sd));
+ assertRoundTrips(docBuilder.freeze());
+}
+
+TEST_F(SerializationTest, MetaSerializationSearchHighlightsNonArray) {
+ MutableDocument docBuilder;
+ docBuilder.setTextScore(10.0);
+ docBuilder.setRandMetaField(20.0);
+ docBuilder.setSearchScore(30.0);
+ // Everything should still round trip even if the searchHighlights metadata isn't an array.
+ docBuilder.setSearchHighlights(Value(1.23));
assertRoundTrips(docBuilder.freeze());
}
@@ -566,10 +633,17 @@ TEST(MetaFields, ToAndFromBson) {
MutableDocument docBuilder;
docBuilder.setTextScore(10.0);
docBuilder.setRandMetaField(20.0);
+ docBuilder.setSearchScore(30.0);
+ docBuilder.setSearchHighlights(DOC_ARRAY("abc"_sd
+ << "def"_sd));
Document doc = docBuilder.freeze();
BSONObj obj = doc.toBsonWithMetaData();
ASSERT_EQ(10.0, obj[Document::metaFieldTextScore].Double());
ASSERT_EQ(20, obj[Document::metaFieldRandVal].numberLong());
+ ASSERT_EQ(30.0, obj[Document::metaFieldSearchScore].Double());
+ ASSERT_BSONOBJ_EQ(obj[Document::metaFieldSearchHighlights].embeddedObject(),
+ BSON_ARRAY("abc"_sd
+ << "def"_sd));
Document fromBson = Document::fromBsonWithMetaData(obj);
ASSERT_TRUE(fromBson.hasTextScore());
ASSERT_TRUE(fromBson.hasRandMetaField());
@@ -577,6 +651,28 @@ TEST(MetaFields, ToAndFromBson) {
ASSERT_EQ(20, fromBson.getRandMetaField());
}
+TEST(MetaFields, MetaFieldsIncludedInDocumentApproximateSize) {
+ MutableDocument docBuilder;
+ docBuilder.setSearchHighlights(DOC_ARRAY("abc"_sd
+ << "def"_sd));
+ const size_t smallMetadataDocSize = docBuilder.freeze().getApproximateSize();
+
+ // The second document has a larger "search highlights" object.
+ MutableDocument docBuilder2;
+ docBuilder2.setSearchHighlights(DOC_ARRAY("abc"_sd
+ << "def"_sd
+ << "ghijklmnop"_sd));
+ Document doc2 = docBuilder2.freeze();
+ const size_t bigMetadataDocSize = doc2.getApproximateSize();
+ ASSERT_GT(bigMetadataDocSize, smallMetadataDocSize);
+
+ // Do a sanity check on the amount of space taken by metadata in document 2.
+ ASSERT_LT(doc2.getMetadataApproximateSize(), 200U);
+
+ Document emptyDoc;
+ ASSERT_LT(emptyDoc.getMetadataApproximateSize(), 100U);
+}
+
TEST(MetaFields, BadSerialization) {
// Write an unrecognized option to the buffer.
BufBuilder bb;
diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp
index 87a2d841e5e..0e8984a81fa 100644
--- a/src/mongo/db/pipeline/expression.cpp
+++ b/src/mongo/db/pipeline/expression.cpp
@@ -2519,6 +2519,8 @@ intrusive_ptr<Expression> ExpressionMeta::parse(
return new ExpressionMeta(expCtx, MetaType::RAND_VAL);
} else if (expr.valueStringData() == "searchScore") {
return new ExpressionMeta(expCtx, MetaType::SEARCH_SCORE);
+ } else if (expr.valueStringData() == "searchHighlights") {
+ return new ExpressionMeta(expCtx, MetaType::SEARCH_HIGHLIGHTS);
} else {
uasserted(17308, "Unsupported argument to $meta: " + expr.String());
}
@@ -2539,6 +2541,9 @@ Value ExpressionMeta::serialize(bool explain) const {
case MetaType::SEARCH_SCORE:
return Value(DOC("$meta"
<< "searchScore"_sd));
+ case MetaType::SEARCH_HIGHLIGHTS:
+ return Value(DOC("$meta"
+ << "searchHighlights"_sd));
}
MONGO_UNREACHABLE;
}
@@ -2551,6 +2556,8 @@ Value ExpressionMeta::evaluate(const Document& root) const {
return root.hasRandMetaField() ? Value(root.getRandMetaField()) : Value();
case MetaType::SEARCH_SCORE:
return root.hasSearchScore() ? Value(root.getSearchScore()) : Value();
+ case MetaType::SEARCH_HIGHLIGHTS:
+ return root.hasSearchHighlights() ? Value(root.getSearchHighlights()) : Value();
}
MONGO_UNREACHABLE;
}
@@ -2558,6 +2565,9 @@ Value ExpressionMeta::evaluate(const Document& root) const {
void ExpressionMeta::_doAddDependencies(DepsTracker* deps) const {
if (_metaType == MetaType::TEXT_SCORE) {
deps->setNeedsMetadata(DepsTracker::MetadataType::TEXT_SCORE, true);
+
+ // We do not add the dependencies for SEARCH_SCORE or SEARCH_HIGHLIGHTS because those values
+ // are not stored in the collection (or in mongod at all).
}
}
diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h
index c042fa32391..7033ee717d1 100644
--- a/src/mongo/db/pipeline/expression.h
+++ b/src/mongo/db/pipeline/expression.h
@@ -1635,6 +1635,7 @@ private:
TEXT_SCORE,
RAND_VAL,
SEARCH_SCORE,
+ SEARCH_HIGHLIGHTS,
};
ExpressionMeta(const boost::intrusive_ptr<ExpressionContext>& expCtx, MetaType metaType);
diff --git a/src/mongo/db/pipeline/expression_test.cpp b/src/mongo/db/pipeline/expression_test.cpp
index 58e1000ed08..30a5eb0dcd0 100644
--- a/src/mongo/db/pipeline/expression_test.cpp
+++ b/src/mongo/db/pipeline/expression_test.cpp
@@ -6005,6 +6005,34 @@ TEST(GetComputedPathsTest, ExpressionMapNotConsideredRenameWithDottedInputPath)
} // namespace GetComputedPathsTest
+namespace expression_meta_test {
+TEST(ExpressionMetaTest, ExpressionMetaSearchScore) {
+ intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ VariablesParseState vps = expCtx->variablesParseState;
+ BSONObj expr = fromjson("{$meta: \"searchScore\"}");
+ auto expressionMeta = ExpressionMeta::parse(expCtx, expr.firstElement(), vps);
+
+ MutableDocument doc;
+ doc.setSearchScore(1.234);
+ Value val = expressionMeta->evaluate(doc.freeze());
+ ASSERT_EQ(val.getDouble(), 1.234);
+}
+
+TEST(ExpressionMetaTest, ExpressionMetaSearchHighlights) {
+ intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ VariablesParseState vps = expCtx->variablesParseState;
+ BSONObj expr = fromjson("{$meta: \"searchHighlights\"}");
+ auto expressionMeta = ExpressionMeta::parse(expCtx, expr.firstElement(), vps);
+
+ MutableDocument doc;
+ Document highlights = DOC("this part" << 1 << "is opaque to the server" << 1);
+ doc.setSearchHighlights(Value(highlights));
+
+ Value val = expressionMeta->evaluate(doc.freeze());
+ ASSERT_DOCUMENT_EQ(val.getDocument(), highlights);
+}
+} // namespace expression_meta_test
+
namespace ExpressionRegexTest {
class ExpressionRegexTest {