SERVER-14738 Correctly determine if update w/ text index is in-place

(backport of 1f00ffcd22e671f5adeece53c68b5e462ba01ec0)
author: Jason Rassi <rassi@10gen.com> 2014-08-07 17:39:57 -0400
committer: Jason Rassi <rassi@10gen.com> 2014-08-07 17:39:57 -0400
commit: 3e570fc232d1e678324b80e803dabba2e41da9a0 (patch)
tree: 4ede47eca7d73a2cdaeb80f9e0f7445f90396b35
parent: 1fbb52159eb9748627ffd2ae16785ad807d3ebb1 (diff)
download: mongo-3e570fc232d1e678324b80e803dabba2e41da9a0.tar.gz
5 files changed, 224 insertions, 5 deletions
diff --git a/jstests/fts_index3.js b/jstests/fts_index3.js
new file mode 100644
index 00000000000..7fe4726d3c4
--- /dev/null
+++ b/jstests/fts_index3.js
@@ -0,0 +1,134 @@
+// Test that updates to fields in a text-indexed document are correctly reflected in the text index.
+load("jstests/libs/fts.js");
+var coll = db.fts_index3;
+var res;
+
+// 1) Create a text index on a single field, insert a document, update the value of the field, and
+// verify that $text with the new value returns the document.
+coll.drop();
+res = coll.ensureIndex({a: "text"});
+assert.isnull(res);
+coll.insert({a: "hello"});
+assert(!db.getLastError());
+assert.eq(1, coll.runCommand("text", {search: "hello"}).stats.n);
+coll.update({}, {$set: {a: "world"}});
+assert(!db.getLastError());
+assert.eq(0, coll.runCommand("text", {search: "hello"}).stats.n);
+assert.eq(1, coll.runCommand("text", {search: "world"}).stats.n);
+
+// 2) Same as #1, but with a wildcard text index.
+coll.drop();
+res = coll.ensureIndex({a: "text"});
+assert.isnull(res);
+coll.insert({a: "hello"});
+assert(!db.getLastError());
+assert.eq(1, coll.runCommand("text", {search: "hello"}).stats.n);
+coll.update({}, {$set: {a: "world"}});
+assert(!db.getLastError());
+assert.eq(0, coll.runCommand("text", {search: "hello"}).stats.n);
+assert.eq(1, coll.runCommand("text", {search: "world"}).stats.n);
+
+// 3) Create a compound text index with an index prefix, insert a document, update the value of the
+// index prefix field, and verify that $text with the new value returns the document.
+coll.drop();
+res = coll.ensureIndex({a: 1, b: "text"});
+assert.isnull(res);
+coll.insert({a: 1, b: "hello"});
+assert(!db.getLastError());
+assert.eq(1, coll.runCommand("text", {search: "hello", filter: {a: 1}}).stats.n);
+coll.update({}, {$set: {a: 2}});
+assert(!db.getLastError());
+assert.eq(0, coll.runCommand("text", {search: "hello", filter: {a: 1}}).stats.n);
+assert.eq(1, coll.runCommand("text", {search: "hello", filter: {a: 2}}).stats.n);
+
+// 4) Same as #3, but with a wildcard text index.
+coll.drop();
+res = coll.ensureIndex({a: 1, "$**": "text"});
+assert.isnull(res);
+coll.insert({a: 1, b: "hello"});
+assert(!db.getLastError());
+assert.eq(1, coll.runCommand("text", {search: "hello", filter: {a: 1}}).stats.n);
+coll.update({}, {$set: {a: 2}});
+assert(!db.getLastError());
+assert.eq(0, coll.runCommand("text", {search: "hello", filter: {a: 1}}).stats.n);
+assert.eq(1, coll.runCommand("text", {search: "hello", filter: {a: 2}}).stats.n);
+
+// 5) Create a compound text index with an index suffix, insert a document, update the value of the
+// index suffix field, and verify that $text with the new value returns the document.
+coll.drop();
+res = coll.ensureIndex({a: "text", b: 1});
+assert.isnull(res);
+coll.insert({a: "hello", b: 1});
+assert(!db.getLastError());
+assert.eq(1, coll.runCommand("text", {search: "hello", filter: {b: 1}}).stats.n);
+coll.update({}, {$set: {b: 2}});
+assert(!db.getLastError());
+assert.eq(0, coll.runCommand("text", {search: "hello", filter: {b: 1}}).stats.n);
+assert.eq(1, coll.runCommand("text", {search: "hello", filter: {b: 2}}).stats.n);
+
+// 6) Same as #5, but with a wildcard text index.
+coll.drop();
+res = coll.ensureIndex({"$**": "text", b: 1});
+assert.isnull(res);
+coll.insert({a: "hello", b: 1});
+assert(!db.getLastError());
+assert.eq(1, coll.runCommand("text", {search: "hello", filter: {b: 1}}).stats.n);
+coll.update({}, {$set: {b: 2}});
+assert(!db.getLastError());
+assert.eq(0, coll.runCommand("text", {search: "hello", filter: {b: 1}}).stats.n);
+assert.eq(1, coll.runCommand("text", {search: "hello", filter: {b: 2}}).stats.n);
+
+// 7) Create a text index on a single field, insert a document, update the language of the document
+// (so as to change the stemming), and verify that $text with the new language returns the document.
+coll.drop();
+res = coll.ensureIndex({a: "text"});
+assert.isnull(res);
+coll.insert({a: "testing", language: "es"});
+assert(!db.getLastError());
+assert.eq(1, coll.runCommand("text", {search: "testing", language: "es"}).stats.n);
+assert.eq(0, coll.runCommand("text", {search: "testing", language: "en"}).stats.n);
+coll.update({}, {$set: {language: "en"}});
+assert(!db.getLastError());
+assert.eq(0, coll.runCommand("text", {search: "testing", language: "es"}).stats.n);
+assert.eq(1, coll.runCommand("text", {search: "testing", language: "en"}).stats.n);
+
+// 8) Same as #7, but with a wildcard text index.
+coll.drop();
+res = coll.ensureIndex({"$**": "text"});
+assert.isnull(res);
+coll.insert({a: "testing", language: "es"});
+assert(!db.getLastError());
+assert.eq(1, coll.runCommand("text", {search: "testing", language: "es"}).stats.n);
+assert.eq(0, coll.runCommand("text", {search: "testing", language: "en"}).stats.n);
+coll.update({}, {$set: {language: "en"}});
+assert(!db.getLastError());
+assert.eq(0, coll.runCommand("text", {search: "testing", language: "es"}).stats.n);
+assert.eq(1, coll.runCommand("text", {search: "testing", language: "en"}).stats.n);
+
+// 10) Create a text index on a single field with a custom language override, insert a document,
+// update the language of the document (so as to change the stemming), and verify that $text with
+// the new language returns the document.
+coll.drop();
+res = coll.ensureIndex({a: "text"}, {language_override: "idioma"});
+assert.isnull(res);
+coll.insert({a: "testing", idioma: "es"});
+assert(!db.getLastError());
+assert.eq(1, coll.runCommand("text", {search: "testing", language: "es"}).stats.n);
+assert.eq(0, coll.runCommand("text", {search: "testing", language: "en"}).stats.n);
+coll.update({}, {$set: {idioma: "en"}});
+assert(!db.getLastError());
+assert.eq(0, coll.runCommand("text", {search: "testing", language: "es"}).stats.n);
+assert.eq(1, coll.runCommand("text", {search: "testing", language: "en"}).stats.n);
+
+// 11) Same as #10, but with a wildcard text index.
+coll.drop();
+res = coll.ensureIndex({"$**": "text"}, {language_override: "idioma"});
+assert.isnull(res);
+coll.insert({a: "testing", idioma: "es"});
+assert(!db.getLastError());
+assert.eq(1, coll.runCommand("text", {search: "testing", language: "es"}).stats.n);
+assert.eq(0, coll.runCommand("text", {search: "testing", language: "en"}).stats.n);
+coll.update({}, {$set: {idioma: "en"}});
+assert(!db.getLastError());
+assert.eq(0, coll.runCommand("text", {search: "testing", language: "es"}).stats.n);
+assert.eq(1, coll.runCommand("text", {search: "testing", language: "en"}).stats.n);
diff --git a/src/mongo/db/index_set.cpp b/src/mongo/db/index_set.cpp
index 65f74ed54ea..2d30e985c73 100644
--- a/src/mongo/db/index_set.cpp
+++ b/src/mongo/db/index_set.cpp
@@ -21,6 +21,8 @@
 
 namespace mongo {
 
+    IndexPathSet::IndexPathSet() : _allPathsIndexed( false ) { }
+
     void IndexPathSet::addPath( const StringData& path ) {
         string s;
         if ( getCanonicalIndexField( path, &s ) ) {
@@ -31,11 +33,20 @@ namespace mongo {
         }
     }
 
+    void IndexPathSet::allPathsIndexed() {
+        _allPathsIndexed = true;
+    }
+
     void IndexPathSet::clear() {
         _canonical.clear();
+        _allPathsIndexed = false;
     }
 
     bool IndexPathSet::mightBeIndexed( const StringData& path ) const {
+        if ( _allPathsIndexed ) {
+            return true;
+        }
+
         StringData use = path;
         string x;
         if ( getCanonicalIndexField( path, &x ) )
diff --git a/src/mongo/db/index_set.h b/src/mongo/db/index_set.h
index 1ff99780f07..9f4d98b60bc 100644
--- a/src/mongo/db/index_set.h
+++ b/src/mongo/db/index_set.h
@@ -32,8 +32,12 @@ namespace mongo {
 
     class IndexPathSet {
     public:
+        IndexPathSet();
+
         void addPath( const StringData& path );
 
+        void allPathsIndexed();
+
         void clear();
 
         bool mightBeIndexed( const StringData& path ) const;
@@ -43,6 +47,8 @@ namespace mongo {
         bool _startsWith( const StringData& a, const StringData& b ) const;
 
         std::set<std::string> _canonical;
+
+        bool _allPathsIndexed;
     };
 
 }
diff --git a/src/mongo/db/index_set_test.cpp b/src/mongo/db/index_set_test.cpp
index 28a1d9fe68e..19835fd7fa4 100644
--- a/src/mongo/db/index_set_test.cpp
+++ b/src/mongo/db/index_set_test.cpp
@@ -31,12 +31,37 @@ namespace mongo {
 
         ASSERT_FALSE( a.mightBeIndexed( "b" ) );
         ASSERT_FALSE( a.mightBeIndexed( "a.c" ) );
+
+        a.clear();
+        ASSERT_FALSE( a.mightBeIndexed( "a.b" ) );
     }
 
     TEST( IndexPathSetTest, Simple2 ) {
         IndexPathSet a;
         a.addPath( "ab" );
         ASSERT_FALSE( a.mightBeIndexed( "a" ) );
+        a.clear();
+        ASSERT_FALSE( a.mightBeIndexed( "ab" ) );
+    }
+
+    TEST( IndexPathSetTest, AllPathsIndexed1 ) {
+        IndexPathSet a;
+        a.allPathsIndexed();
+        ASSERT_TRUE( a.mightBeIndexed( "a" ) );
+        a.clear();
+        ASSERT_FALSE( a.mightBeIndexed( "a" ) );
+    }
+
+    TEST( IndexPathSetTest, AllPathsIndexed2 ) {
+        IndexPathSet a;
+        a.allPathsIndexed();
+        ASSERT_TRUE( a.mightBeIndexed( "a" ) );
+        ASSERT_TRUE( a.mightBeIndexed( "" ) );
+        a.addPath( "a" );
+        ASSERT_TRUE( a.mightBeIndexed( "a" ) );
+        ASSERT_TRUE( a.mightBeIndexed( "b" ) );
+        a.clear();
+        ASSERT_FALSE( a.mightBeIndexed( "a" ) );
     }
 
 
diff --git a/src/mongo/db/namespace_details.cpp b/src/mongo/db/namespace_details.cpp
index 382522bb50a..8573aff9961 100644
--- a/src/mongo/db/namespace_details.cpp
+++ b/src/mongo/db/namespace_details.cpp
@@ -24,6 +24,7 @@
 #include <boost/filesystem/operations.hpp>
 
 #include "mongo/db/db.h"
+#include "mongo/db/fts/fts_spec.h"
 #include "mongo/db/json.h"
 #include "mongo/db/mongommf.h"
 #include "mongo/db/ops/delete.h"
@@ -764,6 +765,17 @@ namespace mongo {
         get_cmap_inlock(ns).erase(ns);
     }
 
+    namespace {
+        bool indexIsText(const BSONObj& keyPattern) {
+            BSONObjIterator it( keyPattern );
+            while ( it.more() ) {
+                if ( str::equals( it.next().valuestrsafe(), "text" ) ) {
+                    return true;
+                }
+            }
+            return false;
+        }
+    }
 
     void NamespaceDetailsTransient::computeIndexKeys() {
         _indexedPaths.clear();
@@ -772,13 +784,44 @@ namespace mongo {
         if ( ! d )
             return;
 
+        bool indexesAreLegacy = (cc().database()->getFile(0)->getHeader()->versionMinor
+                                 == PDFILE_VERSION_MINOR_22_AND_OLDER);
+
         NamespaceDetails::IndexIterator i = d->ii( true );
         while( i.more() ) {
-            BSONObj key = i.next().keyPattern();
-            BSONObjIterator j( key );
-            while ( j.more() ) {
-                BSONElement e = j.next();
-                _indexedPaths.addPath( e.fieldName() );
+            const IndexSpec& indexSpec = getIndexSpec( &(i.next()) );
+            BSONObj key = indexSpec.keyPattern;
+
+            if ( indexesAreLegacy || !indexIsText( key ) ) {
+                BSONObjIterator j( key );
+                while ( j.more() ) {
+                    BSONElement e = j.next();
+                    _indexedPaths.addPath( e.fieldName() );
+                }
+            }
+            else {
+                // This is a text index.  Get the paths for the indexed fields out of the FTSSpec.
+                fts::FTSSpec ftsSpec( indexSpec.info );
+                if ( ftsSpec.wildcard() ) {
+                    _indexedPaths.allPathsIndexed();
+                }
+                else {
+                    for ( size_t i = 0; i < ftsSpec.numExtraBefore(); ++i ) {
+                        _indexedPaths.addPath( ftsSpec.extraBefore(i) );
+                    }
+                    for ( fts::Weights::const_iterator it = ftsSpec.weights().begin();
+                          it != ftsSpec.weights().end();
+                          ++it ) {
+                        _indexedPaths.addPath( it->first );
+                    }
+                    for ( size_t i = 0; i < ftsSpec.numExtraAfter(); ++i ) {
+                        _indexedPaths.addPath( ftsSpec.extraAfter(i) );
+                    }
+                    // Note that 2.4.x supports {textIndexVersion: 1} only. {textIndexVersion: 1}
+                    // can only have one language per document, and the "language override" field
+                    // specifies the exact path to the language.
+                    _indexedPaths.addPath( ftsSpec.languageOverrideField() );
+                }
             }
         }
author	Jason Rassi <rassi@10gen.com>	2014-08-07 17:39:57 -0400
committer	Jason Rassi <rassi@10gen.com>	2014-08-07 17:39:57 -0400
commit	3e570fc232d1e678324b80e803dabba2e41da9a0 (patch)
tree	4ede47eca7d73a2cdaeb80f9e0f7445f90396b35
parent	1fbb52159eb9748627ffd2ae16785ad807d3ebb1 (diff)
download	mongo-3e570fc232d1e678324b80e803dabba2e41da9a0.tar.gz