diff options
author | Eliot Horowitz <eliot@10gen.com> | 2014-03-28 19:13:37 -0400 |
---|---|---|
committer | Eliot Horowitz <eliot@10gen.com> | 2014-03-30 13:01:13 -0400 |
commit | ee42f85b2c7497a78b56eb6e8b293305125b4cc2 (patch) | |
tree | 392940c3ef9dae0b2a8d7868df31666ad4092230 | |
parent | 7810679187eb5bd82de81cc8f152e36ec38f2c4b (diff) | |
download | mongo-ee42f85b2c7497a78b56eb6e8b293305125b4cc2.tar.gz |
SERVER-12550: insert has 3 phases:
a) outside of lock generate keys
b) inside lock touch btree pages
c) do insert
49 files changed, 1642 insertions, 731 deletions
diff --git a/jstests/core/profile4.js b/jstests/core/profile4.js index 7cf22d9fe78..bbeab90b851 100644 --- a/jstests/core/profile4.js +++ b/jstests/core/profile4.js @@ -13,8 +13,9 @@ function profileCursor() { } function lastOp() { - p = profileCursor().sort( { $natural:-1 } ).next(); -// printjson( p ); + var c = profileCursor().sort( { $natural:-1 } ); + var p = c.next(); + //printjson( p ); return p; } @@ -48,8 +49,11 @@ try { [ "keyUpdates", 0 ], [ "nreturned", 0 ], [ "responseLength", 20 ] ] ); - - t.save( {} ); + + db.eval( function(){ print( "-----xxx----" ); } ); + + x = t.save( {} ); + x = t.save( {} ); // check write lock stats are set o = lastOp(); @@ -71,11 +75,11 @@ try { t.save( {} ); t.save( {} ); - t.find().skip( 1 ).limit( 4 ).itcount(); - checkLastOp( [ [ "ntoreturn", 4 ], + t.find().skip( 1 ).limit( 5 ).itcount(); + checkLastOp( [ [ "ntoreturn", 5 ], [ "ntoskip", 1 ], - [ "nscanned", 3 ], - [ "nreturned", 2 ] ] ); + [ "nscanned", 4 ], + [ "nreturned", 3 ] ] ); t.find().batchSize( 2 ).next(); o = lastOp(); diff --git a/src/mongo/SConscript b/src/mongo/SConscript index 762dce06b60..e98c35925a4 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -605,6 +605,7 @@ serverOnlyFiles = [ "db/curop.cpp", "db/catalog/index_catalog.cpp", "db/catalog/index_catalog_entry.cpp", "db/catalog/index_create.cpp", + "db/catalog/index_pregen.cpp", "db/catalog/collection.cpp", "db/structure/collection_compact.cpp", "db/catalog/collection_cursor_cache.cpp", diff --git a/src/mongo/db/catalog/collection.cpp b/src/mongo/db/catalog/collection.cpp index 0793732a957..e29a1a3443f 100644 --- a/src/mongo/db/catalog/collection.cpp +++ b/src/mongo/db/catalog/collection.cpp @@ -168,7 +168,9 @@ namespace mongo { return StatusWith<DiskLoc>( loc ); } - StatusWith<DiskLoc> Collection::insertDocument( const BSONObj& docToInsert, bool enforceQuota ) { + StatusWith<DiskLoc> Collection::insertDocument( const BSONObj& docToInsert, + bool enforceQuota, + const PregeneratedKeys* preGen ) { if ( _indexCatalog.findIdIndex() ) { if ( docToInsert["_id"].eoo() ) { return StatusWith<DiskLoc>( ErrorCodes::InternalError, @@ -179,12 +181,12 @@ namespace mongo { if ( _details->isCapped() ) { // TOOD: old god not done - Status ret = _indexCatalog.checkNoIndexConflicts( docToInsert ); + Status ret = _indexCatalog.checkNoIndexConflicts( docToInsert, preGen ); if ( !ret.isOK() ) return StatusWith<DiskLoc>( ret ); } - StatusWith<DiskLoc> status = _insertDocument( docToInsert, enforceQuota ); + StatusWith<DiskLoc> status = _insertDocument( docToInsert, enforceQuota, preGen ); if ( status.isOK() ) { _details->paddingFits(); } @@ -214,12 +216,17 @@ namespace mongo { StatusWith<DiskLoc> Collection::_insertDocument( const BSONObj& docToInsert, - bool enforceQuota ) { + bool enforceQuota, + const PregeneratedKeys* preGen ) { // TODO: for now, capped logic lives inside NamespaceDetails, which is hidden // under the RecordStore, this feels broken since that should be a // collection access method probably + if ( preGen ) { + _indexCatalog.touch( preGen ); + } + StatusWith<DiskLoc> loc = _recordStore->insertRecord( docToInsert.objdata(), docToInsert.objsize(), enforceQuota ? largestFileNumberInQuota() : 0 ); @@ -229,7 +236,7 @@ namespace mongo { _infoCache.notifyOfWriteOp(); try { - _indexCatalog.indexRecord( docToInsert, loc.getValue() ); + _indexCatalog.indexRecord( docToInsert, loc.getValue(), preGen ); } catch ( AssertionException& e ) { if ( _details->isCapped() ) { @@ -348,7 +355,7 @@ namespace mongo { debug->nmoved += 1; } - StatusWith<DiskLoc> loc = _insertDocument( objNew, enforceQuota ); + StatusWith<DiskLoc> loc = _insertDocument( objNew, enforceQuota, NULL ); if ( loc.isOK() ) { // insert successful, now lets deallocate the old location diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h index 17b88b94bdb..7afabb97c70 100644 --- a/src/mongo/db/catalog/collection.h +++ b/src/mongo/db/catalog/collection.h @@ -157,7 +157,8 @@ namespace mongo { * this does NOT modify the doc before inserting * i.e. will not add an _id field for documents that are missing it */ - StatusWith<DiskLoc> insertDocument( const BSONObj& doc, bool enforceQuota ); + StatusWith<DiskLoc> insertDocument( const BSONObj& doc, bool enforceQuota, + const PregeneratedKeys* preGen = NULL ); StatusWith<DiskLoc> insertDocument( const DocWriter* doc, bool enforceQuota ); @@ -213,7 +214,8 @@ namespace mongo { * - adjust padding */ StatusWith<DiskLoc> _insertDocument( const BSONObj& doc, - bool enforceQuota ); + bool enforceQuota, + const PregeneratedKeys* preGen ); void _compactExtent(const DiskLoc diskloc, int extentNumber, MultiIndexBlock& indexesToInsertTo, diff --git a/src/mongo/db/catalog/collection_info_cache.cpp b/src/mongo/db/catalog/collection_info_cache.cpp index 9d43035bbd4..e606fadbeb0 100644 --- a/src/mongo/db/catalog/collection_info_cache.cpp +++ b/src/mongo/db/catalog/collection_info_cache.cpp @@ -35,6 +35,7 @@ #include "mongo/db/structure/catalog/namespace_details-inl.h" #include "mongo/db/query/plan_cache.h" #include "mongo/db/catalog/collection.h" +#include "mongo/db/catalog/index_pregen.h" #include "mongo/util/debug_util.h" #include "mongo/db/structure/catalog/index_details.h" // XXX @@ -55,6 +56,8 @@ namespace mongo { _keysComputed = false; // query settings is not affected by info cache reset. // index filters should persist throughout life of collection + + GeneratorHolder::getInstance()->reset( _collection ); } void CollectionInfoCache::computeIndexKeys() { diff --git a/src/mongo/db/catalog/database.cpp b/src/mongo/db/catalog/database.cpp index eb8b1eb4547..3c1ce311d77 100644 --- a/src/mongo/db/catalog/database.cpp +++ b/src/mongo/db/catalog/database.cpp @@ -396,6 +396,8 @@ namespace mongo { audit::logDropCollection( currentClient.get(), fullns ); + GeneratorHolder::getInstance()->dropped( fullns.toString() ); + try { Status s = collection->getIndexCatalog()->dropAllIndexes( true ); if ( !s.isOK() ) { @@ -421,6 +423,8 @@ namespace mongo { _clearCollectionCache( fullns ); // we want to do this always + GeneratorHolder::getInstance()->dropped( fullns.toString() ); + if ( !s.isOK() ) return s; diff --git a/src/mongo/db/catalog/index_catalog.cpp b/src/mongo/db/catalog/index_catalog.cpp index 51bc8a03b1c..98af09d95eb 100644 --- a/src/mongo/db/catalog/index_catalog.cpp +++ b/src/mongo/db/catalog/index_catalog.cpp @@ -32,6 +32,7 @@ #include <vector> +#include "mongo/base/owned_pointer_map.h" #include "mongo/db/audit.h" #include "mongo/db/background.h" #include "mongo/db/catalog/collection.h" @@ -40,6 +41,7 @@ #include "mongo/db/client.h" #include "mongo/db/clientcursor.h" #include "mongo/db/curop.h" +#include "mongo/db/db.h" #include "mongo/db/field_ref.h" #include "mongo/db/index/2d_access_method.h" #include "mongo/db/index/btree_access_method.h" @@ -57,6 +59,7 @@ #include "mongo/db/keypattern.h" #include "mongo/db/kill_current_op.h" #include "mongo/db/ops/delete.h" +#include "mongo/db/pagefault.h" #include "mongo/db/query/internal_plans.h" #include "mongo/db/repl/rs.h" // this is ugly #include "mongo/db/storage/data_file.h" @@ -1006,6 +1009,11 @@ namespace mongo { return _prev->accessMethod(); } + IndexCatalogEntry* IndexCatalog::IndexIterator::entry( IndexDescriptor* desc ) { + invariant( desc == _prev->descriptor() ); + return _prev; + } + void IndexCatalog::IndexIterator::_advance() { _next = NULL; @@ -1066,8 +1074,14 @@ namespace mongo { if ( !keyPattern.isPrefixOf( desc->keyPattern() ) ) continue; - if( !desc->isMultikey() ) + if( !desc->isMultikey() ) { return desc; + } + else if ( desc->isIdIndex() ) { + warning() << "_id index is marked as multi-key" + << " ns: " << _collection->ns() + << " key: " << desc->keyPattern(); + } if ( !requireSingleKey ) best = desc; @@ -1136,18 +1150,19 @@ namespace mongo { Status IndexCatalog::_indexRecord( IndexCatalogEntry* index, const BSONObj& obj, - const DiskLoc &loc ) { + const DiskLoc &loc, + const PregeneratedKeysOnIndex* prep ) { InsertDeleteOptions options; options.logIfError = false; bool isUnique = - KeyPattern::isIdKeyPattern(index->descriptor()->keyPattern()) || + index->descriptor()->isIdIndex() || index->descriptor()->unique(); options.dupsAllowed = ignoreUniqueIndex( index->descriptor() ) || !isUnique; int64_t inserted; - return index->accessMethod()->insert(obj, loc, options, &inserted); + return index->accessMethod()->insert(obj, loc, options, &inserted, prep ); } Status IndexCatalog::_unindexRecord( IndexCatalogEntry* index, @@ -1168,8 +1183,31 @@ namespace mongo { return Status::OK(); } + void IndexCatalog::touch( const PregeneratedKeys* preGen ) const { + if ( !cc().allowedToThrowPageFaultException() ) { + // no point touching if we can't throw + return; + } + + // touch what we can + for ( IndexCatalogEntryContainer::const_iterator i = _entries.begin(); + i != _entries.end(); + ++i ) { + + IndexCatalogEntry* entry = *i; + shared_ptr<KeyGenerator> gen = entry->accessMethod()->getKeyGenerator(); + const PregeneratedKeysOnIndex* perIndex = preGen->get( entry ); + if ( perIndex && + perIndex->generator->getId() == gen->getId() ) { + entry->accessMethod()->touch( perIndex->keys ); + } + } + + } - void IndexCatalog::indexRecord( const BSONObj& obj, const DiskLoc &loc ) { + void IndexCatalog::indexRecord( const BSONObj& obj, + const DiskLoc &loc, + const PregeneratedKeys* preGen ) { for ( IndexCatalogEntryContainer::const_iterator i = _entries.begin(); i != _entries.end(); @@ -1177,8 +1215,12 @@ namespace mongo { IndexCatalogEntry* entry = *i; + const PregeneratedKeysOnIndex* perIndex = NULL; + if ( preGen ) + perIndex = preGen->get( entry ); + try { - Status s = _indexRecord( entry, obj, loc ); + Status s = _indexRecord( entry, obj, loc, perIndex ); uassert(s.location(), s.reason(), s.isOK() ); } catch ( AssertionException& ae ) { @@ -1222,7 +1264,9 @@ namespace mongo { } - Status IndexCatalog::checkNoIndexConflicts( const BSONObj &obj ) { + Status IndexCatalog::checkNoIndexConflicts( const BSONObj &obj, + const PregeneratedKeys* preGen ) { + // TODO: preGen IndexIterator ii = getIndexIterator( true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); diff --git a/src/mongo/db/catalog/index_catalog.h b/src/mongo/db/catalog/index_catalog.h index d1852b13a7a..8d8c6a2942c 100644 --- a/src/mongo/db/catalog/index_catalog.h +++ b/src/mongo/db/catalog/index_catalog.h @@ -33,6 +33,7 @@ #include <vector> #include "mongo/db/catalog/index_catalog_entry.h" +#include "mongo/db/catalog/index_pregen.h" #include "mongo/db/diskloc.h" #include "mongo/db/jsobj.h" #include "mongo/platform/unordered_map.h" @@ -113,6 +114,8 @@ namespace mongo { // returns the access method for the last return IndexDescriptor IndexAccessMethod* accessMethod( IndexDescriptor* desc ); + + IndexCatalogEntry* entry( IndexDescriptor* desc ); private: IndexIterator( const IndexCatalog* cat, bool includeUnfinishedIndexes ); @@ -235,8 +238,14 @@ namespace mongo { // ----- data modifiers ------ + /** + * TODO: document + */ + void touch( const PregeneratedKeys* preGen ) const; + // this throws for now - void indexRecord( const BSONObj& obj, const DiskLoc &loc ); + void indexRecord( const BSONObj& obj, const DiskLoc &loc, + const PregeneratedKeys* preGen = NULL ); void unindexRecord( const BSONObj& obj, const DiskLoc& loc, bool noWarn ); @@ -244,7 +253,7 @@ namespace mongo { * checks all unique indexes and checks for conflicts * should not throw */ - Status checkNoIndexConflicts( const BSONObj& obj ); + Status checkNoIndexConflicts( const BSONObj& obj, const PregeneratedKeys* preGen ); // ------- temp internal ------- @@ -285,7 +294,10 @@ namespace mongo { // meaning we shouldn't modify catalog Status _checkUnfinished() const; - Status _indexRecord( IndexCatalogEntry* index, const BSONObj& obj, const DiskLoc &loc ); + Status _indexRecord( IndexCatalogEntry* index, + const BSONObj& obj, const DiskLoc &loc, + const PregeneratedKeysOnIndex* pregen ); + Status _unindexRecord( IndexCatalogEntry* index, const BSONObj& obj, const DiskLoc &loc, bool logIfError ); diff --git a/src/mongo/db/catalog/index_pregen.cpp b/src/mongo/db/catalog/index_pregen.cpp new file mode 100644 index 00000000000..37f26dc8233 --- /dev/null +++ b/src/mongo/db/catalog/index_pregen.cpp @@ -0,0 +1,137 @@ +// index_pregen.cpp + +/** +* Copyright (C) 2014 MongoDB Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#include "mongo/db/catalog/collection.h" +#include "mongo/db/catalog/index_catalog.h" +#include "mongo/db/catalog/index_pregen.h" +#include "mongo/db/d_concurrency.h" +#include "mongo/db/index/index_access_method.h" + +namespace mongo { + + const PregeneratedKeysOnIndex* PregeneratedKeys::get( IndexCatalogEntry* entry ) const { + Map::const_iterator i = _indexes.find( entry ); + if ( i == _indexes.end() ) + return NULL; + return &i->second; + } + + void PregeneratedKeys::gen( const BSONObj& obj, + IndexCatalogEntry* entry, + const boost::shared_ptr<KeyGenerator>& generator ) { + PregeneratedKeysOnIndex& onIndex = _indexes[entry]; + onIndex.generator = generator; + generator->getKeys( obj, &onIndex.keys ); + } + + GeneratorHolder::GeneratorHolder() + : _collectionsLock( "GeneratorHolder" ) { + } + + bool GeneratorHolder::prepare( const StringData& ns, + const BSONObj& obj, + PregeneratedKeys* out ) { + invariant( out ); + + if ( Lock::isLocked() ) { + // ewww, we do nothing + return true; + } + + shared_ptr<MyCollection> collection; + { + string temp = ns.toString(); + SimpleMutex::scoped_lock lk( _collectionsLock ); + Collections::const_iterator i = _collections.find( temp ); + if ( i == _collections.end() ) + return false; + collection = i->second; + } + + try { + for ( size_t i = 0; i < collection->indexes.size(); i++ ) { + out->gen( obj, collection->indexes[i].entry, collection->indexes[i].generator ); + } + } + catch ( DBException& e ) { + log() << "GeneratorHolder::prepare failed: " << e; + out->clear(); + return false; + } + return true; + } + + void GeneratorHolder::reset( const Collection* aCollection ) { + shared_ptr<MyCollection> myCollection( new MyCollection() ); + myCollection->ns = aCollection->ns().ns(); + + IndexCatalog::IndexIterator ii = aCollection->getIndexCatalog()->getIndexIterator( true ); + while ( ii.more() ) { + IndexDescriptor* desc = ii.next(); + IndexCatalogEntry* entry = ii.entry( desc ); + + MyIndex myIndex; + myIndex.entry = entry; + myIndex.generator = entry->accessMethod()->getKeyGenerator(); + + myCollection->indexes.push_back( myIndex ); + } + + SimpleMutex::scoped_lock lk( _collectionsLock ); + _collections[aCollection->ns().ns()] = myCollection; + } + + void GeneratorHolder::dropped( const std::string& ns ) { + SimpleMutex::scoped_lock lk( _collectionsLock ); + _collections.erase( ns ); + } + + void GeneratorHolder::droppedDatabase( const std::string& db ) { + SimpleMutex::scoped_lock lk( _collectionsLock ); + vector<string> toDrop; + for ( Collections::const_iterator i = _collections.begin(); i != _collections.end(); ++i ) { + StringData temp = nsToDatabaseSubstring( i->first ); + if ( temp == db ) + toDrop.push_back( i->first ); + } + + for ( size_t i = 0; i < toDrop.size(); i++ ) { + _collections.erase( toDrop[i] ); + } + } + + namespace { + // this is ok because we don't access this via the construction of any globals + GeneratorHolder theHolder; + } + GeneratorHolder* GeneratorHolder::getInstance() { + return &theHolder; + } +} diff --git a/src/mongo/db/catalog/index_pregen.h b/src/mongo/db/catalog/index_pregen.h new file mode 100644 index 00000000000..1f133f28d66 --- /dev/null +++ b/src/mongo/db/catalog/index_pregen.h @@ -0,0 +1,126 @@ +// index_pregen.h + +/** +* Copyright (C) 2014 MongoDB Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#pragma once + +#include <map> +#include <string> +#include <vector> + +#include "mongo/base/disallow_copying.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/db/index/key_generator.h" +#include "mongo/util/assert_util.h" +#include "mongo/util/concurrency/mutex.h" + +/** + * This entire thing goes away with document level locking + */ +namespace mongo { + + class Collection; + class IndexCatalogEntry; + + /** + * One per index for pregenerated keys + */ + struct PregeneratedKeysOnIndex { + PregeneratedKeysOnIndex() {} + PregeneratedKeysOnIndex( const PregeneratedKeysOnIndex& other ) { + invariant( keys.empty() ); + generator = other.generator; + } + boost::shared_ptr<KeyGenerator> generator; + BSONObjSet keys; + }; + + /** + * one per document + * not thread safe + */ + class PregeneratedKeys { + public: + PregeneratedKeys(){} + PregeneratedKeys( const PregeneratedKeys& other ){ + // we need the copy construct for insertion into map + // we don't want to actually copy data though + // so we fail in that case + invariant( _indexes.empty() ); + } + const PregeneratedKeysOnIndex* get( IndexCatalogEntry* entry ) const; + + void gen( const BSONObj& obj, + IndexCatalogEntry* entry, + const boost::shared_ptr<KeyGenerator>& generator ); + + void clear() { _indexes.clear(); } + private: + typedef std::map<IndexCatalogEntry*,PregeneratedKeysOnIndex> Map; + Map _indexes; + }; + + /** + * this is a singleten + */ + class GeneratorHolder { + public: + GeneratorHolder(); + + /** + * @return if we have a cache entry for this + */ + bool prepare( const StringData& ns, + const BSONObj& obj, + PregeneratedKeys* out ); + + void reset( const Collection* aCollection ); + + void dropped( const std::string& ns ); + + void droppedDatabase( const std::string& ns ); + + static GeneratorHolder* getInstance(); + + private: + struct MyIndex { + IndexCatalogEntry* entry; // cannot use as a pointer, just a number + boost::shared_ptr<KeyGenerator> generator; // safe to use + }; + + struct MyCollection { + std::string ns; + std::vector<MyIndex> indexes; + }; + + typedef std::map< string,boost::shared_ptr<MyCollection> > Collections; // map from namespace to the indexes + SimpleMutex _collectionsLock; // for modifying the map structure itself + Collections _collections; + }; +} diff --git a/src/mongo/db/commands/write_commands/batch_executor.cpp b/src/mongo/db/commands/write_commands/batch_executor.cpp index 348c5f763f9..48743cc8090 100644 --- a/src/mongo/db/commands/write_commands/batch_executor.cpp +++ b/src/mongo/db/commands/write_commands/batch_executor.cpp @@ -612,6 +612,7 @@ namespace mongo { static void singleInsert( const BSONObj& docToInsert, Collection* collection, + const PregeneratedKeys* pregen, WriteOpResult* result ); static void singleCreateIndex( const BSONObj& indexDesc, @@ -684,6 +685,9 @@ namespace mongo { // index both. std::vector<StatusWith<BSONObj> > normalizedInserts; + // We generate these outside of any locks + std::vector<PregeneratedKeys> pregeneratedKeys; + private: bool _lockAndCheckImpl(WriteOpResult* result); @@ -758,7 +762,8 @@ namespace mongo { // Goes over the request and preprocesses normalized versions of all the inserts in the request static void normalizeInserts( const BatchedCommandRequest& request, - vector<StatusWith<BSONObj> >* normalizedInserts ) { + vector<StatusWith<BSONObj> >* normalizedInserts, + vector<PregeneratedKeys>* pregen ) { normalizedInserts->reserve(request.sizeWriteOps()); for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) { @@ -767,6 +772,30 @@ namespace mongo { normalizedInserts->push_back( normalInsert ); if ( request.getOrdered() && !normalInsert.isOK() ) break; + + if ( !normalInsert.getValue().isEmpty() ) + insertDoc = normalInsert.getValue(); + + pregen->push_back( PregeneratedKeys() ); + bool hasEntry = GeneratorHolder::getInstance()->prepare( request.getTargetingNS(), + insertDoc, + &pregen->back() ); + + if ( !hasEntry && i == 0 ) { + string ns = request.getTargetingNS(); + try { + Client::ReadContext ctx(ns); + Database* db = ctx.ctx().db(); + Collection * c = db->getCollection( ns ); + if ( c ) { + GeneratorHolder::getInstance()->reset( c ); + } + } + catch ( DBException& e ) { + log() << "normalizeInserts failde to pregen keys: " << e; + } + } + } } @@ -793,7 +822,7 @@ namespace mongo { // further insertOne calls, and stop the batch. As a result, the only expected source of // such exceptions are interruptions. ExecInsertsState state(&request); - normalizeInserts(request, &state.normalizedInserts); + normalizeInserts(request, &state.normalizedInserts, &state.pregeneratedKeys); for (state.currIndex = 0; state.currIndex < state.request->sizeWriteOps(); ++state.currIndex) { @@ -965,7 +994,10 @@ namespace mongo { } if (!state->request->isInsertIndexRequest()) { - singleInsert(insertDoc, state->getCollection(), result); + const PregeneratedKeys* pregen = NULL; + if ( state->pregeneratedKeys.size() > state->currIndex ) + pregen = &state->pregeneratedKeys[state->currIndex]; + singleInsert(insertDoc, state->getCollection(), pregen, result); } else { singleCreateIndex(insertDoc, state->getCollection(), result); @@ -1029,13 +1061,14 @@ namespace mongo { */ static void singleInsert( const BSONObj& docToInsert, Collection* collection, + const PregeneratedKeys* pregen, WriteOpResult* result ) { const string& insertNS = collection->ns().ns(); Lock::assertWriteLocked( insertNS ); - StatusWith<DiskLoc> status = collection->insertDocument( docToInsert, true ); + StatusWith<DiskLoc> status = collection->insertDocument( docToInsert, true, pregen ); if ( !status.isOK() ) { result->setError(toWriteError(status.getStatus())); diff --git a/src/mongo/db/dbhelpers.cpp b/src/mongo/db/dbhelpers.cpp index 33e3d2b3946..f670a4d7157 100644 --- a/src/mongo/db/dbhelpers.cpp +++ b/src/mongo/db/dbhelpers.cpp @@ -469,6 +469,7 @@ namespace mongo { if ( !collection ) return Status( ErrorCodes::NamespaceNotFound, ns ); // Require single key + IndexDescriptor *idx = collection->getIndexCatalog()->findIndexByPrefix( range.keyPattern, true ); diff --git a/src/mongo/db/index/2d_access_method.cpp b/src/mongo/db/index/2d_access_method.cpp index 7a5cbc9ff0b..5e1680cf10a 100644 --- a/src/mongo/db/index/2d_access_method.cpp +++ b/src/mongo/db/index/2d_access_method.cpp @@ -34,7 +34,6 @@ #include "mongo/db/geo/core.h" #include "mongo/db/index_names.h" #include "mongo/db/index/2d_common.h" -#include "mongo/db/index/expression_keys_private.h" #include "mongo/db/index/expression_params.h" #include "mongo/db/jsobj.h" #include "mongo/db/pdfile.h" @@ -47,16 +46,18 @@ namespace mongo { const IndexDescriptor* descriptor = btreeState->descriptor(); ExpressionParams::parseTwoDParams(descriptor->infoObj(), &_params); + + _keyGenerator.reset( new TwoDKeyGenerator( _params ) ); } /** Finds the key objects to put in an index */ void TwoDAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - ExpressionKeysPrivate::get2DKeys(obj, _params, keys, NULL); + _keyGenerator->getKeys( obj, keys ); } /** Finds all locations in a geo-indexed object */ void TwoDAccessMethod::getKeys(const BSONObj& obj, vector<BSONObj>& locs) const { - ExpressionKeysPrivate::get2DKeys(obj, _params, NULL, &locs); + _keyGenerator->getKeys( obj, NULL, &locs ); } } // namespace mongo diff --git a/src/mongo/db/index/2d_access_method.h b/src/mongo/db/index/2d_access_method.h index 594ae3ab27c..a9894cbf05e 100644 --- a/src/mongo/db/index/2d_access_method.h +++ b/src/mongo/db/index/2d_access_method.h @@ -30,6 +30,7 @@ #include "mongo/base/status.h" #include "mongo/db/index/2d_common.h" +#include "mongo/db/index/2d_key_generator.h" #include "mongo/db/index/btree_based_access_method.h" #include "mongo/db/jsobj.h" @@ -72,6 +73,7 @@ namespace mongo { TwoDAccessMethod(IndexCatalogEntry* btreeState); virtual ~TwoDAccessMethod() { } + virtual shared_ptr<KeyGenerator> getKeyGenerator() const { return _keyGenerator; } private: friend class TwoDIndexCursor; friend class twod_internal::GeoPoint; @@ -104,6 +106,8 @@ namespace mongo { virtual void getKeys(const BSONObj& obj, BSONObjSet* keys); TwoDIndexingParams _params; + + shared_ptr<TwoDKeyGenerator> _keyGenerator; }; } // namespace mongo diff --git a/src/mongo/db/index/2d_key_generator.cpp b/src/mongo/db/index/2d_key_generator.cpp new file mode 100644 index 00000000000..dcbaccd68e9 --- /dev/null +++ b/src/mongo/db/index/2d_key_generator.cpp @@ -0,0 +1,149 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#include "mongo/db/index/2d_access_method.h" + +#include <string> +#include <vector> + +#include "mongo/db/geo/core.h" +#include "mongo/db/index_names.h" +#include "mongo/db/index/2d_common.h" +#include "mongo/db/index/expression_params.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/pdfile.h" + +namespace mongo { + + + TwoDKeyGenerator::TwoDKeyGenerator( const TwoDIndexingParams& params ) + : _params( params ) { + } + + void TwoDKeyGenerator::getKeys(const BSONObj& obj, BSONObjSet* keys) const { + getKeys( obj, keys, NULL ); + } + + void TwoDKeyGenerator::getKeys(const BSONObj& obj, + BSONObjSet* keys, + std::vector<BSONObj>* locs ) const { + BSONElementMSet bSet; + + // Get all the nested location fields, but don't return individual elements from + // the last array, if it exists. + obj.getFieldsDotted(_params.geo.c_str(), bSet, false); + + if (bSet.empty()) + return; + + for (BSONElementMSet::iterator setI = bSet.begin(); setI != bSet.end(); ++setI) { + BSONElement geo = *setI; + + if (geo.eoo() || !geo.isABSONObj()) + continue; + + // + // Grammar for location lookup: + // locs ::= [loc,loc,...,loc]|{<k>:loc,<k>:loc,...,<k>:loc}|loc + // loc ::= { <k1> : #, <k2> : # }|[#, #]|{} + // + // Empty locations are ignored, preserving single-location semantics + // + + BSONObj embed = geo.embeddedObject(); + if (embed.isEmpty()) + continue; + + // Differentiate between location arrays and locations + // by seeing if the first element value is a number + bool singleElement = embed.firstElement().isNumber(); + + BSONObjIterator oi(embed); + + while (oi.more()) { + BSONObj locObj; + + if (singleElement) { + locObj = embed; + } else { + BSONElement locElement = oi.next(); + + uassert(16804, mongoutils::str::stream() << + "location object expected, location array not in correct format", + locElement.isABSONObj()); + + locObj = locElement.embeddedObject(); + if(locObj.isEmpty()) + continue; + } + + BSONObjBuilder b(64); + + // Remember the actual location object if needed + if (locs) + locs->push_back(locObj); + + // Stop if we don't need to get anything but location objects + if (!keys) { + if (singleElement) break; + else continue; + } + + _params.geoHashConverter->hash(locObj, &obj).appendToBuilder(&b, ""); + + // Go through all the other index keys + for (vector<pair<string, int> >::const_iterator i = _params.other.begin(); + i != _params.other.end(); ++i) { + // Get *all* fields for the index key + BSONElementSet eSet; + obj.getFieldsDotted(i->first, eSet); + + if (eSet.size() == 0) + b.appendNull(""); + else if (eSet.size() == 1) + b.appendAs(*(eSet.begin()), ""); + else { + // If we have more than one key, store as an array of the objects + BSONArrayBuilder aBuilder; + + for (BSONElementSet::iterator ei = eSet.begin(); ei != eSet.end(); + ++ei) { + aBuilder.append(*ei); + } + + b.append("", aBuilder.arr()); + } + } + keys->insert(b.obj()); + if(singleElement) break; + } + } + } + + +} // namespace mongo diff --git a/src/mongo/db/index/2d_key_generator.h b/src/mongo/db/index/2d_key_generator.h new file mode 100644 index 00000000000..b612819a42b --- /dev/null +++ b/src/mongo/db/index/2d_key_generator.h @@ -0,0 +1,59 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#pragma once + +#include "mongo/base/status.h" +#include "mongo/db/index/2d_common.h" +#include "mongo/db/index/btree_based_access_method.h" +#include "mongo/db/jsobj.h" + +namespace mongo { + + class IndexCatalogEntry; + class IndexCursor; + class IndexDescriptor; + struct TwoDIndexingParams; + + class TwoDKeyGenerator : public KeyGenerator { + public: + TwoDKeyGenerator( const TwoDIndexingParams& params ); + virtual ~TwoDKeyGenerator() {} + + virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const; + + virtual void getKeys(const BSONObj& obj, + BSONObjSet* keys, + std::vector<BSONObj>* locs ) const; + + private: + TwoDIndexingParams _params; + }; + + +} // namespace mongo diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript index c8d757d150c..d651a7c7be4 100644 --- a/src/mongo/db/index/SConscript +++ b/src/mongo/db/index/SConscript @@ -5,8 +5,13 @@ Import("env") env.Library( target='key_generator', source=[ + '2d_key_generator.cpp', 'btree_key_generator.cpp', - 'expression_keys_private.cpp', + 'fts_key_generator.cpp', + 'hash_key_generator.cpp', + 'haystack_key_generator.cpp', + 'key_generator.cpp', + 's2_key_generator.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/bson', diff --git a/src/mongo/db/index/btree_access_method.h b/src/mongo/db/index/btree_access_method.h index 66cb0bf5fde..779dfe80b56 100644 --- a/src/mongo/db/index/btree_access_method.h +++ b/src/mongo/db/index/btree_access_method.h @@ -57,11 +57,13 @@ namespace mongo { BtreeAccessMethod(IndexCatalogEntry* btreeState ); virtual ~BtreeAccessMethod() { } + virtual shared_ptr<KeyGenerator> getKeyGenerator() const { return _keyGenerator; } private: virtual void getKeys(const BSONObj& obj, BSONObjSet* keys); // Our keys differ for V0 and V1. - scoped_ptr<BtreeKeyGenerator> _keyGenerator; + // this is shared so that we can pass it off + shared_ptr<BtreeKeyGenerator> _keyGenerator; }; } // namespace mongo diff --git a/src/mongo/db/index/btree_based_access_method.cpp b/src/mongo/db/index/btree_based_access_method.cpp index 16ce53fa001..729d1226be0 100644 --- a/src/mongo/db/index/btree_based_access_method.cpp +++ b/src/mongo/db/index/btree_based_access_method.cpp @@ -56,18 +56,27 @@ namespace mongo { } // Find the keys for obj, put them in the tree pointing to loc - Status BtreeBasedAccessMethod::insert(const BSONObj& obj, const DiskLoc& loc, - const InsertDeleteOptions& options, int64_t* numInserted) { + Status BtreeBasedAccessMethod::insert(const BSONObj& obj, + const DiskLoc& loc, + const InsertDeleteOptions& options, + int64_t* numInserted, + const PregeneratedKeysOnIndex* prepared ) { - *numInserted = 0; + const BSONObjSet* keysToUse = NULL; - BSONObjSet keys; - // Delegate to the subclass. - getKeys(obj, &keys); + BSONObjSet myOwnedKeys; + + if ( prepared && prepared->generator->getId() == getKeyGenerator()->getId() ) { + keysToUse = &prepared->keys; + } + else { + getKeys(obj, &myOwnedKeys); + keysToUse = &myOwnedKeys; + } - Status ret = Status::OK(); + *numInserted = 0; - for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { + for (BSONObjSet::const_iterator i = keysToUse->begin(); i != keysToUse->end(); ++i) { try { _interface->bt_insert(_btreeState, _btreeState->head(), @@ -76,23 +85,26 @@ namespace mongo { options.dupsAllowed, true); ++*numInserted; - } catch (AssertionException& e) { + } + catch (AssertionException& e) { if (10287 == e.getCode() && !_btreeState->isReady()) { // This is the duplicate key exception. We ignore it for some reason in BG // indexing. DEV log() << "info: key already in index during bg indexing (ok)\n"; - } else if (!options.dupsAllowed) { + } + else if (!options.dupsAllowed) { // Assuming it's a duplicate key exception. Clean up any inserted keys. - for (BSONObjSet::const_iterator j = keys.begin(); j != i; ++j) { + for (BSONObjSet::const_iterator j = keysToUse->begin(); j != i; ++j) { removeOneKey(*j, loc); } *numInserted = 0; return Status(ErrorCodes::DuplicateKey, e.what(), e.getCode()); - } else { + } + else { problem() << " caught assertion addKeysToIndex " << _descriptor->indexNamespace() << obj["_id"] << endl; - ret = Status(ErrorCodes::InternalError, e.what(), e.getCode()); + return Status(ErrorCodes::InternalError, e.what(), e.getCode()); } } } @@ -101,7 +113,7 @@ namespace mongo { _btreeState->setMultikey(); } - return ret; + return Status::OK(); } bool BtreeBasedAccessMethod::removeOneKey(const BSONObj& key, const DiskLoc& loc) { @@ -194,7 +206,10 @@ namespace mongo { Status BtreeBasedAccessMethod::touch(const BSONObj& obj) { BSONObjSet keys; getKeys(obj, &keys); + return touch( keys ); + } + Status BtreeBasedAccessMethod::touch(const BSONObjSet& keys) { for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { int unusedPos; bool unusedFound; @@ -317,10 +332,15 @@ namespace mongo { ~BtreeBulk() {} + virtual shared_ptr<KeyGenerator> getKeyGenerator() const { + invariant( false ); + } + virtual Status insert(const BSONObj& obj, const DiskLoc& loc, const InsertDeleteOptions& options, - int64_t* numInserted) { + int64_t* numInserted, + const PregeneratedKeysOnIndex* pregen ) { BSONObjSet keys; _real->getKeys(obj, &keys); _phase1.addKeys(keys, loc, false); @@ -371,6 +391,10 @@ namespace mongo { return _notAllowed(); } + virtual Status touch(const BSONObjSet& obj) { + return _notAllowed(); + } + virtual Status validate(int64_t* numKeys) { return _notAllowed(); } diff --git a/src/mongo/db/index/btree_based_access_method.h b/src/mongo/db/index/btree_based_access_method.h index a99db4db6a1..714b33f6a59 100644 --- a/src/mongo/db/index/btree_based_access_method.h +++ b/src/mongo/db/index/btree_based_access_method.h @@ -64,7 +64,8 @@ namespace mongo { virtual Status insert(const BSONObj& obj, const DiskLoc& loc, const InsertDeleteOptions& options, - int64_t* numInserted); + int64_t* numInserted, + const PregeneratedKeysOnIndex* prepared = NULL ) ; virtual Status remove(const BSONObj& obj, const DiskLoc& loc, @@ -91,6 +92,8 @@ namespace mongo { virtual Status touch(const BSONObj& obj); + virtual Status touch( const BSONObjSet& keys ); + virtual Status validate(int64_t* numKeys); // XXX: consider migrating callers to use IndexCursor instead diff --git a/src/mongo/db/index/btree_key_generator.cpp b/src/mongo/db/index/btree_key_generator.cpp index bc1910448da..2a88148ef6d 100644 --- a/src/mongo/db/index/btree_key_generator.cpp +++ b/src/mongo/db/index/btree_key_generator.cpp @@ -36,7 +36,10 @@ namespace mongo { BtreeKeyGenerator::BtreeKeyGenerator(vector<const char*> fieldNames, vector<BSONElement> fixed, bool isSparse) - : _fieldNames(fieldNames), _isSparse(isSparse), _fixed(fixed) { + : _isSparse(isSparse), _fixed(fixed) { + + for ( size_t i = 0; i < fieldNames.size(); i++ ) + _fieldNames.push_back( fieldNames[i] ); BSONObjBuilder nullKeyBuilder; for (size_t i = 0; i < fieldNames.size(); ++i) { @@ -52,7 +55,10 @@ namespace mongo { void BtreeKeyGenerator::getKeys(const BSONObj &obj, BSONObjSet *keys) const { // These are mutated as part of the getKeys call. :| - vector<const char*> fieldNames(_fieldNames); + vector<const char*> fieldNames; + for ( size_t i = 0; i < _fieldNames.size(); i++ ) + fieldNames.push_back( _fieldNames[i].c_str() ); + vector<BSONElement> fixed(_fixed); getKeysImpl(fieldNames, fixed, obj, keys); if (keys->empty() && ! _isSparse) { diff --git a/src/mongo/db/index/btree_key_generator.h b/src/mongo/db/index/btree_key_generator.h index 1ae3e75c1c1..eba22214b12 100644 --- a/src/mongo/db/index/btree_key_generator.h +++ b/src/mongo/db/index/btree_key_generator.h @@ -31,6 +31,7 @@ #include <vector> #include <set> #include "mongo/db/jsobj.h" +#include "mongo/db/index/key_generator.h" namespace mongo { @@ -38,7 +39,7 @@ namespace mongo { * Internal class used by BtreeAccessMethod to generate keys for indexed documents. * This class is meant to be kept under the index access layer. */ - class BtreeKeyGenerator { + class BtreeKeyGenerator : public KeyGenerator { public: BtreeKeyGenerator(vector<const char*> fieldNames, vector<BSONElement> fixed, bool isSparse); virtual ~BtreeKeyGenerator() { } @@ -49,7 +50,7 @@ namespace mongo { protected: // These are used by the getKeysImpl(s) below. - vector<const char*> _fieldNames; + vector<string> _fieldNames; bool _isSparse; BSONObj _nullKey; // a full key with all fields null BSONObj _nullObj; // only used for _nullElt diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp deleted file mode 100644 index 0f94a7b7b58..00000000000 --- a/src/mongo/db/index/expression_keys_private.cpp +++ /dev/null @@ -1,500 +0,0 @@ -/** - * Copyright (C) 2014 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/db/index/expression_keys_private.h" - -#include <utility> - -#include "mongo/db/fts/fts_index_format.h" -#include "mongo/db/geo/geoconstants.h" -#include "mongo/db/geo/geoparser.h" -#include "mongo/db/geo/geoquery.h" -#include "mongo/db/geo/s2common.h" -#include "mongo/db/geo/s2.h" -#include "mongo/db/index_names.h" -#include "mongo/db/index/2d_common.h" -#include "mongo/util/assert_util.h" -#include "mongo/util/mongoutils/str.h" -#include "third_party/s2/s2cell.h" -#include "third_party/s2/s2regioncoverer.h" - -namespace { - - using namespace mongo; - - // - // Helper functions for getHaystackKeys - // - - /** - * Build a new BSONObj with root in it. If e is non-empty, append that to the key. - * Insert the BSONObj into keys. - * Used by getHaystackKeys. - */ - void addKey(const string& root, const BSONElement& e, BSONObjSet* keys) { - BSONObjBuilder buf; - buf.append("", root); - - if (e.eoo()) - buf.appendNull(""); - else - buf.appendAs(e, ""); - - keys->insert(buf.obj()); - } - - // - // Helper functions for getS2Keys - // - - static void S2KeysFromRegion(S2RegionCoverer *coverer, const S2Region ®ion, - vector<string> *out) { - vector<S2CellId> covering; - coverer->GetCovering(region, &covering); - for (size_t i = 0; i < covering.size(); ++i) { - out->push_back(covering[i].toString()); - } - } - - - bool S2GetKeysForObject(const BSONObj& obj, - const S2IndexingParams& params, - vector<string>* out) { - S2RegionCoverer coverer; - params.configureCoverer(&coverer); - - GeometryContainer geoContainer; - if (!geoContainer.parseFrom(obj)) { return false; } - - // Only certain geometries can be indexed in the old index format S2_INDEX_VERSION_1. See - // definition of S2IndexVersion for details. - if (params.indexVersion == S2_INDEX_VERSION_1 && !geoContainer.isSimpleContainer()) { - return false; - } - - if (!geoContainer.hasS2Region()) { return false; } - - S2KeysFromRegion(&coverer, geoContainer.getRegion(), out); - - return true; - } - - - /** - * Get the index keys for elements that are GeoJSON. - * Used by getS2Keys. - */ - void getS2GeoKeys(const BSONObj& document, const BSONElementSet& elements, - const S2IndexingParams& params, - BSONObjSet* out) { - for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { - uassert(16754, "Can't parse geometry from element: " + i->toString(), - i->isABSONObj()); - const BSONObj &geoObj = i->Obj(); - - vector<string> cells; - bool succeeded = S2GetKeysForObject(geoObj, params, &cells); - uassert(16755, "Can't extract geo keys from object, malformed geometry?: " - + document.toString(), succeeded); - - uassert(16756, "Unable to generate keys for (likely malformed) geometry: " - + document.toString(), - cells.size() > 0); - - for (vector<string>::const_iterator it = cells.begin(); it != cells.end(); ++it) { - BSONObjBuilder b; - b.append("", *it); - out->insert(b.obj()); - } - } - - if (0 == out->size()) { - BSONObjBuilder b; - b.appendNull(""); - out->insert(b.obj()); - } - } - - /** - * Expands array and appends items to 'out'. - * Used by getOneLiteralKey. - */ - void getS2LiteralKeysArray(const BSONObj& obj, BSONObjSet* out) { - BSONObjIterator objIt(obj); - if (!objIt.more()) { - // Empty arrays are indexed as undefined. - BSONObjBuilder b; - b.appendUndefined(""); - out->insert(b.obj()); - } else { - // Non-empty arrays are exploded. - while (objIt.more()) { - BSONObjBuilder b; - b.appendAs(objIt.next(), ""); - out->insert(b.obj()); - } - } - } - - /** - * If 'elt' is an array, expands elt and adds items to 'out'. - * Otherwise, adds 'elt' as a single element. - * Used by getLiteralKeys. - */ - void getS2OneLiteralKey(const BSONElement& elt, BSONObjSet* out) { - if (Array == elt.type()) { - getS2LiteralKeysArray(elt.Obj(), out); - } else { - // One thing, not an array, index as-is. - BSONObjBuilder b; - b.appendAs(elt, ""); - out->insert(b.obj()); - } - } - - /** - * elements is a non-geo field. Add the values literally, expanding arrays. - * Used by getS2Keys. - */ - void getS2LiteralKeys(const BSONElementSet& elements, BSONObjSet* out) { - if (0 == elements.size()) { - // Missing fields are indexed as null. - BSONObjBuilder b; - b.appendNull(""); - out->insert(b.obj()); - } else { - for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { - getS2OneLiteralKey(*i, out); - } - } - } - -} // namespace - -namespace mongo { - - using std::pair; - using std::string; - using std::vector; - - // static - void ExpressionKeysPrivate::get2DKeys(const BSONObj &obj, - const TwoDIndexingParams& params, - BSONObjSet* keys, - std::vector<BSONObj>* locs) { - BSONElementMSet bSet; - - // Get all the nested location fields, but don't return individual elements from - // the last array, if it exists. - obj.getFieldsDotted(params.geo.c_str(), bSet, false); - - if (bSet.empty()) - return; - - for (BSONElementMSet::iterator setI = bSet.begin(); setI != bSet.end(); ++setI) { - BSONElement geo = *setI; - - if (geo.eoo() || !geo.isABSONObj()) - continue; - - // - // Grammar for location lookup: - // locs ::= [loc,loc,...,loc]|{<k>:loc,<k>:loc,...,<k>:loc}|loc - // loc ::= { <k1> : #, <k2> : # }|[#, #]|{} - // - // Empty locations are ignored, preserving single-location semantics - // - - BSONObj embed = geo.embeddedObject(); - if (embed.isEmpty()) - continue; - - // Differentiate between location arrays and locations - // by seeing if the first element value is a number - bool singleElement = embed.firstElement().isNumber(); - - BSONObjIterator oi(embed); - - while (oi.more()) { - BSONObj locObj; - - if (singleElement) { - locObj = embed; - } else { - BSONElement locElement = oi.next(); - - uassert(16804, mongoutils::str::stream() << - "location object expected, location array not in correct format", - locElement.isABSONObj()); - - locObj = locElement.embeddedObject(); - if(locObj.isEmpty()) - continue; - } - - BSONObjBuilder b(64); - - // Remember the actual location object if needed - if (locs) - locs->push_back(locObj); - - // Stop if we don't need to get anything but location objects - if (!keys) { - if (singleElement) break; - else continue; - } - - params.geoHashConverter->hash(locObj, &obj).appendToBuilder(&b, ""); - - // Go through all the other index keys - for (vector<pair<string, int> >::const_iterator i = params.other.begin(); - i != params.other.end(); ++i) { - // Get *all* fields for the index key - BSONElementSet eSet; - obj.getFieldsDotted(i->first, eSet); - - if (eSet.size() == 0) - b.appendNull(""); - else if (eSet.size() == 1) - b.appendAs(*(eSet.begin()), ""); - else { - // If we have more than one key, store as an array of the objects - BSONArrayBuilder aBuilder; - - for (BSONElementSet::iterator ei = eSet.begin(); ei != eSet.end(); - ++ei) { - aBuilder.append(*ei); - } - - b.append("", aBuilder.arr()); - } - } - keys->insert(b.obj()); - if(singleElement) break; - } - } - } - - // static - void ExpressionKeysPrivate::getFTSKeys(const BSONObj &obj, - const fts::FTSSpec& ftsSpec, - BSONObjSet* keys) { - fts::FTSIndexFormat::getKeys(ftsSpec, obj, keys); - } - - // static - void ExpressionKeysPrivate::getHashKeys(const BSONObj& obj, - const string& hashedField, - HashSeed seed, - int hashVersion, - bool isSparse, - BSONObjSet* keys) { - - const char* cstr = hashedField.c_str(); - BSONElement fieldVal = obj.getFieldDottedOrArray(cstr); - uassert(16766, "Error: hashed indexes do not currently support array values", - fieldVal.type() != Array ); - - if (!fieldVal.eoo()) { - BSONObj key = BSON( "" << makeSingleHashKey(fieldVal, seed, hashVersion)); - keys->insert(key); - } - else if (!isSparse) { - BSONObj nullObj = BSON("" << BSONNULL); - keys->insert(BSON("" << makeSingleHashKey(nullObj.firstElement(), seed, hashVersion))); - } - } - - // static - long long int ExpressionKeysPrivate::makeSingleHashKey(const BSONElement& e, - HashSeed seed, - int v) { - massert(16767, "Only HashVersion 0 has been defined" , v == 0 ); - return BSONElementHasher::hash64(e, seed); - } - - // static - void ExpressionKeysPrivate::getHaystackKeys(const BSONObj& obj, - const std::string& geoField, - const std::vector<std::string>& otherFields, - double bucketSize, - BSONObjSet* keys) { - - BSONElement loc = obj.getFieldDotted(geoField); - - if (loc.eoo()) { return; } - - uassert(16775, "latlng not an array", loc.isABSONObj()); - string root; - { - BSONObjIterator i(loc.Obj()); - BSONElement x = i.next(); - BSONElement y = i.next(); - root = makeHaystackString(hashHaystackElement(x, bucketSize), - hashHaystackElement(y, bucketSize)); - } - - verify(otherFields.size() == 1); - - BSONElementSet all; - - // This is getFieldsDotted (plural not singular) since the object we're indexing - // may be an array. - obj.getFieldsDotted(otherFields[0], all); - - if (all.size() == 0) { - // We're indexing a document that doesn't have the secondary non-geo field present. - // XXX: do we want to add this even if all.size() > 0? result:empty search terms - // match everything instead of only things w/empty search terms) - addKey(root, BSONElement(), keys); - } else { - // Ex:If our secondary field is type: "foo" or type: {a:"foo", b:"bar"}, - // all.size()==1. We can query on the complete field. - // Ex: If our secondary field is type: ["A", "B"] all.size()==2 and all has values - // "A" and "B". The query looks for any of the fields in the array. - for (BSONElementSet::iterator i = all.begin(); i != all.end(); ++i) { - addKey(root, *i, keys); - } - } - } - - // static - int ExpressionKeysPrivate::hashHaystackElement(const BSONElement& e, double bucketSize) { - uassert(16776, "geo field is not a number", e.isNumber()); - double d = e.numberDouble(); - d += 180; - d /= bucketSize; - return static_cast<int>(d); - } - - // static - std::string ExpressionKeysPrivate::makeHaystackString(int hashedX, int hashedY) { - mongoutils::str::stream ss; - ss << hashedX << "_" << hashedY; - return ss; - } - - void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, - const BSONObj& keyPattern, - const S2IndexingParams& params, - BSONObjSet* keys) { - BSONObjSet keysToAdd; - - // Does one of our documents have a geo field? - bool haveGeoField = false; - - // We output keys in the same order as the fields we index. - BSONObjIterator i(keyPattern); - while (i.more()) { - BSONElement e = i.next(); - - // First, we get the keys that this field adds. Either they're added literally from - // the value of the field, or they're transformed if the field is geo. - BSONElementSet fieldElements; - // false means Don't expand the last array, duh. - obj.getFieldsDotted(e.fieldName(), fieldElements, false); - - BSONObjSet keysForThisField; - if (IndexNames::GEO_2DSPHERE == e.valuestr()) { - if (S2_INDEX_VERSION_2 == params.indexVersion) { - // For V2, - // geo: null, - // geo: undefined - // geo: [] - // should all behave like there is no geo field. So we look for these cases and - // throw out the field elements if we find them. - if (1 == fieldElements.size()) { - BSONElement elt = *fieldElements.begin(); - // Get the :null and :undefined cases. - if (elt.isNull() || Undefined == elt.type()) { - fieldElements.clear(); - } - else if (elt.isABSONObj()) { - // And this is the :[] case. - BSONObj obj = elt.Obj(); - if (0 == obj.nFields()) { - fieldElements.clear(); - } - } - } - - // V2 2dsphere indices require that at least one geo field to be present in a - // document in order to index it. - if (fieldElements.size() > 0) { - haveGeoField = true; - } - } - - getS2GeoKeys(obj, fieldElements, params, &keysForThisField); - } else { - getS2LiteralKeys(fieldElements, &keysForThisField); - } - - // We expect there to be the missing field element present in the keys if data is - // missing. So, this should be non-empty. - verify(!keysForThisField.empty()); - - // We take the Cartesian product of all of the keys. This requires that we have - // some keys to take the Cartesian product with. If keysToAdd.empty(), we - // initialize it. - if (keysToAdd.empty()) { - keysToAdd = keysForThisField; - continue; - } - - BSONObjSet updatedKeysToAdd; - for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); - ++it) { - for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); - newIt!= keysForThisField.end(); ++newIt) { - BSONObjBuilder b; - b.appendElements(*it); - b.append(newIt->firstElement()); - updatedKeysToAdd.insert(b.obj()); - } - } - keysToAdd = updatedKeysToAdd; - } - - // Make sure that if we're V2 there's at least one geo field present in the doc. - if (S2_INDEX_VERSION_2 == params.indexVersion) { - if (!haveGeoField) { - return; - } - } - - if (keysToAdd.size() > params.maxKeysPerInsert) { - warning() << "insert of geo object generated lots of keys (" << keysToAdd.size() - << ") consider creating larger buckets. obj=" - << obj; - } - - *keys = keysToAdd; - } - -} // namespace mongo diff --git a/src/mongo/db/index/expression_keys_private.h b/src/mongo/db/index/expression_keys_private.h deleted file mode 100644 index 6e3fb1ea9a0..00000000000 --- a/src/mongo/db/index/expression_keys_private.h +++ /dev/null @@ -1,130 +0,0 @@ -/** - * Copyright (C) 2014 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include <vector> - -#include "mongo/bson/bsonobj.h" -#include "mongo/bson/bsonmisc.h" -#include "mongo/db/hasher.h" - -namespace mongo { - - struct TwoDIndexingParams; - struct S2IndexingParams; - - namespace fts { - - class FTSSpec; - - } // namespace fts - - /** - * Do not use this class or any of its methods directly. The key generation of btree-indexed - * expression indices is kept outside of the access method for testing and for upgrade - * compatibility checking. - */ - class ExpressionKeysPrivate { - public: - - // - // 2d - // - - static void get2DKeys(const BSONObj &obj, - const TwoDIndexingParams& params, - BSONObjSet* keys, - std::vector<BSONObj>* locs); - - // - // FTS - // - - static void getFTSKeys(const BSONObj &obj, const fts::FTSSpec& ftsSpec, BSONObjSet* keys); - - // - // Hash - // - - /** - * Generates keys for hash access method. - */ - static void getHashKeys(const BSONObj& obj, - const std::string& hashedField, - HashSeed seed, - int hashVersion, - bool isSparse, - BSONObjSet* keys); - - /** - * Hashing function used by both getHashKeys and the cursors we create. - * Exposed for testing in dbtests/namespacetests.cpp and - * so mongo/db/index_legacy.cpp can use it. - */ - static long long int makeSingleHashKey(const BSONElement& e, HashSeed seed, int v); - - // - // Haystack - // - - /** - * Generates keys for haystack access method. - */ - static void getHaystackKeys(const BSONObj& obj, - const std::string& geoField, - const std::vector<std::string>& otherFields, - double bucketSize, - BSONObjSet* keys); - - /** - * Returns a hash of a BSON element. - * Used by getHaystackKeys and HaystackAccessMethod::searchCommand. - */ - static int hashHaystackElement(const BSONElement& e, double bucketSize); - - /** - * Joins two strings using underscore as separator. - * Used by getHaystackKeys and HaystackAccessMethod::searchCommand. - */ - static std::string makeHaystackString(int hashedX, int hashedY); - - // - // S2 - // - - /** - * Generates keys for S2 access method. - */ - static void getS2Keys(const BSONObj& obj, - const BSONObj& keyPattern, - const S2IndexingParams& params, - BSONObjSet* keys); - }; - -} // namespace mongo diff --git a/src/mongo/db/index/expression_params.h b/src/mongo/db/index/expression_params.h index 90d294c5800..845497c90af 100644 --- a/src/mongo/db/index/expression_params.h +++ b/src/mongo/db/index/expression_params.h @@ -27,8 +27,9 @@ */ #include "mongo/db/geo/s2common.h" -#include "mongo/db/index_names.h" +#include "mongo/db/hasher.h" #include "mongo/db/index/2d_common.h" +#include "mongo/db/index_names.h" #include "mongo/db/jsobj.h" #include "mongo/util/mongoutils/str.h" diff --git a/src/mongo/db/index/external_key_generator.cpp b/src/mongo/db/index/external_key_generator.cpp index 1a4a17eba77..89d94ea0bbe 100644 --- a/src/mongo/db/index/external_key_generator.cpp +++ b/src/mongo/db/index/external_key_generator.cpp @@ -28,13 +28,14 @@ #include "mongo/db/index/external_key_generator.h" -#include "mongo/db/fts/fts_index_format.h" -#include "mongo/db/geo/s2common.h" -#include "mongo/db/index_names.h" -#include "mongo/db/index/2d_common.h" +#include "mongo/db/index/2d_key_generator.h" #include "mongo/db/index/btree_key_generator.h" -#include "mongo/db/index/expression_keys_private.h" #include "mongo/db/index/expression_params.h" +#include "mongo/db/index/fts_key_generator.h" +#include "mongo/db/index/hash_key_generator.h" +#include "mongo/db/index/haystack_key_generator.h" +#include "mongo/db/index/s2_key_generator.h" +#include "mongo/db/index_names.h" #include "mongo/db/jsobj.h" namespace mongo { @@ -51,30 +52,34 @@ namespace { if (IndexNames::GEO_2D == type) { TwoDIndexingParams params; ExpressionParams::parseTwoDParams(infoObj, ¶ms); - ExpressionKeysPrivate::get2DKeys(doc, params, keys, NULL); + TwoDKeyGenerator gen( params ); + gen.getKeys( doc, keys ); } else if (IndexNames::GEO_HAYSTACK == type) { string geoField; vector<string> otherFields; double bucketSize; ExpressionParams::parseHaystackParams(infoObj, &geoField, &otherFields, &bucketSize); - ExpressionKeysPrivate::getHaystackKeys(doc, geoField, otherFields, bucketSize, keys); + HaystackKeyGenerator gen( geoField, otherFields, bucketSize ); + gen.getKeys( doc, keys ); } else if (IndexNames::GEO_2DSPHERE == type) { S2IndexingParams params; ExpressionParams::parse2dsphereParams(infoObj, ¶ms); - ExpressionKeysPrivate::getS2Keys(doc, keyPattern, params, keys); + S2KeyGenerator gen( keyPattern, params ); + gen.getKeys( doc, keys ); } else if (IndexNames::TEXT == type) { - fts::FTSSpec spec(infoObj); - ExpressionKeysPrivate::getFTSKeys(doc, spec, keys); + FTSKeyGenerator gen(infoObj); + gen.getKeys( doc, keys ); } else if (IndexNames::HASHED == type) { HashSeed seed; int version; string field; ExpressionParams::parseHashParams(infoObj, &seed, &version, &field); - ExpressionKeysPrivate::getHashKeys(doc, field, seed, version, infoObj["sparse"].trueValue(), keys); + HashKeyGenerator gen( field, seed, version, infoObj["sparse"].trueValue() ); + gen.getKeys( doc, keys ); } else { invariant(IndexNames::BTREE == type); diff --git a/src/mongo/db/index/fts_access_method.cpp b/src/mongo/db/index/fts_access_method.cpp index c803867b812..b2cadaeb6f9 100644 --- a/src/mongo/db/index/fts_access_method.cpp +++ b/src/mongo/db/index/fts_access_method.cpp @@ -26,16 +26,20 @@ * it in the license file. */ +#include "mongo/db/fts/fts_index_format.h" #include "mongo/db/index/fts_access_method.h" -#include "mongo/db/index/expression_keys_private.h" +#include "mongo/db/index/fts_key_generator.h" namespace mongo { FTSAccessMethod::FTSAccessMethod(IndexCatalogEntry* btreeState) - : BtreeBasedAccessMethod(btreeState), _ftsSpec(btreeState->descriptor()->infoObj()) { } + : BtreeBasedAccessMethod(btreeState), + _ftsSpec(btreeState->descriptor()->infoObj()), + _keyGenerator( new FTSKeyGenerator(btreeState->descriptor()->infoObj() ) ) { + } void FTSAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - ExpressionKeysPrivate::getFTSKeys(obj, _ftsSpec, keys); + _keyGenerator->getKeys( obj, keys ); } } // namespace mongo diff --git a/src/mongo/db/index/fts_access_method.h b/src/mongo/db/index/fts_access_method.h index ba10b4e2b67..21f66bde2d3 100644 --- a/src/mongo/db/index/fts_access_method.h +++ b/src/mongo/db/index/fts_access_method.h @@ -43,11 +43,13 @@ namespace mongo { const fts::FTSSpec& getSpec() const { return _ftsSpec; } + virtual shared_ptr<KeyGenerator> getKeyGenerator() const { return _keyGenerator; } private: // Implemented: virtual void getKeys(const BSONObj& obj, BSONObjSet* keys); fts::FTSSpec _ftsSpec; + shared_ptr<KeyGenerator> _keyGenerator; }; } // namespace mongo diff --git a/src/mongo/db/index/fts_key_generator.cpp b/src/mongo/db/index/fts_key_generator.cpp new file mode 100644 index 00000000000..cbbae831bb6 --- /dev/null +++ b/src/mongo/db/index/fts_key_generator.cpp @@ -0,0 +1,42 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#include "mongo/db/fts/fts_index_format.h" +#include "mongo/db/index/fts_key_generator.h" + +namespace mongo { + + FTSKeyGenerator::FTSKeyGenerator( const BSONObj& infoObj ) + : _ftsSpec( infoObj ) { + } + + void FTSKeyGenerator::getKeys(const BSONObj& obj, BSONObjSet* keys) const { + return fts::FTSIndexFormat::getKeys( _ftsSpec, obj, keys ); + } + +} // namespace mongo diff --git a/src/mongo/db/index/fts_key_generator.h b/src/mongo/db/index/fts_key_generator.h new file mode 100644 index 00000000000..86c66d916fb --- /dev/null +++ b/src/mongo/db/index/fts_key_generator.h @@ -0,0 +1,50 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#pragma once + +#include "mongo/base/status.h" +#include "mongo/db/fts/fts_spec.h" +#include "mongo/db/index/btree_based_access_method.h" +#include "mongo/db/index/index_descriptor.h" +#include "mongo/db/jsobj.h" + +namespace mongo { + + class FTSKeyGenerator : public KeyGenerator { + public: + FTSKeyGenerator( const BSONObj& infoObj ); + virtual ~FTSKeyGenerator(){} + + virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const; + + private: + fts::FTSSpec _ftsSpec; + }; + +} // namespace mongo diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp index 34729a1cf77..b9483017191 100644 --- a/src/mongo/db/index/hash_access_method.cpp +++ b/src/mongo/db/index/hash_access_method.cpp @@ -28,9 +28,9 @@ #include "mongo/db/structure/btree/btree.h" #include "mongo/db/hasher.h" -#include "mongo/db/index/expression_keys_private.h" #include "mongo/db/index/expression_params.h" #include "mongo/db/index/hash_access_method.h" +#include "mongo/db/index/hash_key_generator.h" namespace mongo { @@ -50,10 +50,15 @@ namespace mongo { &_seed, &_hashVersion, &_hashedField); + + _keyGenerator.reset( new HashKeyGenerator( _hashedField, + _seed, + _hashVersion, + _descriptor->isSparse() ) ); } void HashAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - ExpressionKeysPrivate::getHashKeys(obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), keys); + _keyGenerator->getKeys( obj, keys ); } } // namespace mongo diff --git a/src/mongo/db/index/hash_access_method.h b/src/mongo/db/index/hash_access_method.h index d0f46620334..c68b7ec39dd 100644 --- a/src/mongo/db/index/hash_access_method.h +++ b/src/mongo/db/index/hash_access_method.h @@ -53,6 +53,7 @@ namespace mongo { return Status::OK(); } + virtual shared_ptr<KeyGenerator> getKeyGenerator() const { return _keyGenerator; } private: virtual void getKeys(const BSONObj& obj, BSONObjSet* keys); @@ -66,6 +67,8 @@ namespace mongo { int _hashVersion; BSONObj _missingKey; + + shared_ptr<KeyGenerator> _keyGenerator; }; } // namespace mongo diff --git a/src/mongo/db/index/hash_key_generator.cpp b/src/mongo/db/index/hash_key_generator.cpp new file mode 100644 index 00000000000..2e891701fba --- /dev/null +++ b/src/mongo/db/index/hash_key_generator.cpp @@ -0,0 +1,70 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#include "mongo/db/structure/btree/btree.h" +#include "mongo/db/hasher.h" +#include "mongo/db/index/expression_params.h" +#include "mongo/db/index/hash_access_method.h" +#include "mongo/db/index/hash_key_generator.h" + +namespace mongo { + + HashKeyGenerator::HashKeyGenerator( const std::string& hashedField, + HashSeed seed, + int hashVersion, + bool isSparse ) + : _hashedField( hashedField ), + _seed( seed ), + _hashVersion( hashVersion ), + _isSparse( isSparse ) { + } + + void HashKeyGenerator::getKeys( const BSONObj& obj, BSONObjSet* keys ) const { + const char* cstr = _hashedField.c_str(); + BSONElement fieldVal = obj.getFieldDottedOrArray(cstr); + uassert(16766, "Error: hashed indexes do not currently support array values", + fieldVal.type() != Array ); + + if (!fieldVal.eoo()) { + BSONObj key = BSON( "" << makeSingleHashKey(fieldVal, _seed, _hashVersion)); + keys->insert(key); + } + else if (!_isSparse) { + BSONObj nullObj = BSON("" << BSONNULL); + keys->insert(BSON("" << makeSingleHashKey(nullObj.firstElement(), _seed, _hashVersion))); + } + + } + + long long int HashKeyGenerator::makeSingleHashKey(const BSONElement& e, HashSeed seed, int v) { + massert(16767, "Only HashVersion 0 has been defined" , v == 0 ); + return BSONElementHasher::hash64(e, seed); + } + + +} // namespace mongo diff --git a/src/mongo/db/index/hash_key_generator.h b/src/mongo/db/index/hash_key_generator.h new file mode 100644 index 00000000000..dd8f851be4f --- /dev/null +++ b/src/mongo/db/index/hash_key_generator.h @@ -0,0 +1,60 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#pragma once + +#include <string> + +#include "mongo/base/status.h" +#include "mongo/db/hasher.h" // For HashSeed. +#include "mongo/db/index/index_descriptor.h" +#include "mongo/db/index/key_generator.h" +#include "mongo/db/jsobj.h" + +namespace mongo { + + class HashKeyGenerator : public KeyGenerator { + public: + HashKeyGenerator( const std::string& hashedField, + HashSeed seed, + int hashVersion, + bool isSparse ); + virtual ~HashKeyGenerator() {} + + virtual void getKeys( const BSONObj& obj, BSONObjSet* keys ) const; + + static long long int makeSingleHashKey(const BSONElement& e, HashSeed seed, int v); + + private: + std::string _hashedField; + HashSeed _seed; + int _hashVersion; + bool _isSparse; + }; + +} // namespace mongo diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp index 76250d6797e..31441d1f114 100644 --- a/src/mongo/db/index/haystack_access_method.cpp +++ b/src/mongo/db/index/haystack_access_method.cpp @@ -30,9 +30,9 @@ #include "mongo/base/status.h" #include "mongo/db/geo/hash.h" -#include "mongo/db/index/expression_keys_private.h" #include "mongo/db/index/expression_params.h" #include "mongo/db/index/haystack_access_method_internal.h" +#include "mongo/db/index/haystack_key_generator.h" #include "mongo/db/jsobj.h" #include "mongo/db/pdfile.h" #include "mongo/db/query/internal_plans.h" @@ -51,10 +51,12 @@ namespace mongo { uassert(16773, "no geo field specified", _geoField.size()); uassert(16774, "no non-geo fields specified", _otherFields.size()); + + _keyGenerator.reset( new HaystackKeyGenerator( _geoField, _otherFields, _bucketSize ) ); } void HaystackAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - ExpressionKeysPrivate::getHaystackKeys(obj, _geoField, _otherFields, _bucketSize, keys); + _keyGenerator->getKeys( obj, keys ); } void HaystackAccessMethod::searchCommand(const BSONObj& nearObj, double maxDistance, @@ -67,8 +69,8 @@ namespace mongo { int x, y; { BSONObjIterator i(nearObj); - x = ExpressionKeysPrivate::hashHaystackElement(i.next(), _bucketSize); - y = ExpressionKeysPrivate::hashHaystackElement(i.next(), _bucketSize); + x = HaystackKeyGenerator::hashHaystackElement(i.next(), _bucketSize); + y = HaystackKeyGenerator::hashHaystackElement(i.next(), _bucketSize); } int scale = static_cast<int>(ceil(maxDistance / _bucketSize)); @@ -79,7 +81,7 @@ namespace mongo { for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) { for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) { BSONObjBuilder bb; - bb.append("", ExpressionKeysPrivate::makeHaystackString(x + a, y + b)); + bb.append("", HaystackKeyGenerator::makeHaystackString(x + a, y + b)); for (unsigned i = 0; i < _otherFields.size(); i++) { // See if the non-geo field we're indexing on is in the provided search term. diff --git a/src/mongo/db/index/haystack_access_method.h b/src/mongo/db/index/haystack_access_method.h index 29b6485f7d2..dc377083f96 100644 --- a/src/mongo/db/index/haystack_access_method.h +++ b/src/mongo/db/index/haystack_access_method.h @@ -58,6 +58,7 @@ namespace mongo { HaystackAccessMethod(IndexCatalogEntry* btreeState); virtual ~HaystackAccessMethod() { } + virtual shared_ptr<KeyGenerator> getKeyGenerator() const { return _keyGenerator; } protected: friend class GeoHaystackSearchCommand; void searchCommand(const BSONObj& nearObj, double maxDistance, const BSONObj& search, @@ -69,6 +70,8 @@ namespace mongo { string _geoField; vector<string> _otherFields; double _bucketSize; + + shared_ptr<KeyGenerator> _keyGenerator; }; } // namespace mongo diff --git a/src/mongo/db/index/haystack_key_generator.cpp b/src/mongo/db/index/haystack_key_generator.cpp new file mode 100644 index 00000000000..f49f0e33f79 --- /dev/null +++ b/src/mongo/db/index/haystack_key_generator.cpp @@ -0,0 +1,128 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/index/haystack_access_method.h" + +#include "mongo/base/status.h" +#include "mongo/db/geo/hash.h" +#include "mongo/db/index/expression_params.h" +#include "mongo/db/index/haystack_access_method_internal.h" +#include "mongo/db/index/haystack_key_generator.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/pdfile.h" +#include "mongo/db/query/internal_plans.h" + +namespace mongo { + + // ------------------- + + HaystackKeyGenerator::HaystackKeyGenerator( const std::string& geoField, + const std::vector<std::string>& otherFields, + double bucketSize ) + : _geoField( geoField ), + _otherFields( otherFields ), + _bucketSize( bucketSize ) { + } + + namespace { + /** + * Build a new BSONObj with root in it. If e is non-empty, append that to the key. + * Insert the BSONObj into keys. + * Used by getHaystackKeys. + */ + void addKey(const string& root, const BSONElement& e, BSONObjSet* keys) { + BSONObjBuilder buf; + buf.append("", root); + + if (e.eoo()) + buf.appendNull(""); + else + buf.appendAs(e, ""); + + keys->insert(buf.obj()); + } + + } + + void HaystackKeyGenerator::getKeys( const BSONObj& obj, BSONObjSet* keys) const { + + BSONElement loc = obj.getFieldDotted(_geoField); + + if (loc.eoo()) { return; } + + uassert(16775, "latlng not an array", loc.isABSONObj()); + string root; + { + BSONObjIterator i(loc.Obj()); + BSONElement x = i.next(); + BSONElement y = i.next(); + root = makeHaystackString(hashHaystackElement(x, _bucketSize), + hashHaystackElement(y, _bucketSize)); + } + + verify(_otherFields.size() == 1); + + BSONElementSet all; + + // This is getFieldsDotted (plural not singular) since the object we're indexing + // may be an array. + obj.getFieldsDotted(_otherFields[0], all); + + if (all.size() == 0) { + // We're indexing a document that doesn't have the secondary non-geo field present. + // XXX: do we want to add this even if all.size() > 0? result:empty search terms + // match everything instead of only things w/empty search terms) + addKey(root, BSONElement(), keys); + } else { + // Ex:If our secondary field is type: "foo" or type: {a:"foo", b:"bar"}, + // all.size()==1. We can query on the complete field. + // Ex: If our secondary field is type: ["A", "B"] all.size()==2 and all has values + // "A" and "B". The query looks for any of the fields in the array. + for (BSONElementSet::iterator i = all.begin(); i != all.end(); ++i) { + addKey(root, *i, keys); + } + } + } + + // static + int HaystackKeyGenerator::hashHaystackElement(const BSONElement& e, double bucketSize) { + uassert(16776, "geo field is not a number", e.isNumber()); + double d = e.numberDouble(); + d += 180; + d /= bucketSize; + return static_cast<int>(d); + } + + // static + std::string HaystackKeyGenerator::makeHaystackString(int hashedX, int hashedY) { + mongoutils::str::stream ss; + ss << hashedX << "_" << hashedY; + return ss; + } + +} // namespace mongo diff --git a/src/mongo/db/index/haystack_key_generator.h b/src/mongo/db/index/haystack_key_generator.h new file mode 100644 index 00000000000..c540e161346 --- /dev/null +++ b/src/mongo/db/index/haystack_key_generator.h @@ -0,0 +1,65 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#pragma once + +#include "mongo/base/status.h" +#include "mongo/db/index/btree_based_access_method.h" +#include "mongo/db/index/index_descriptor.h" +#include "mongo/db/jsobj.h" + +namespace mongo { + + class HaystackKeyGenerator : public KeyGenerator { + public: + HaystackKeyGenerator( const std::string& geoField, + const std::vector<std::string>& otherFields, + double bucketSize ); + virtual ~HaystackKeyGenerator() {} + + virtual void getKeys( const BSONObj& obj, BSONObjSet* keys ) const; + + /** + * Returns a hash of a BSON element. + * Used by getHaystackKeys and HaystackAccessMethod::searchCommand. + */ + static int hashHaystackElement(const BSONElement& e, double bucketSize); + + /** + * Joins two strings using underscore as separator. + * Used by getHaystackKeys and HaystackAccessMethod::searchCommand. + */ + static std::string makeHaystackString(int hashedX, int hashedY); + + private: + std::string _geoField; + std::vector<std::string> _otherFields; + double _bucketSize; + }; + +} // namespace mongo diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index 4f2c2e8650f..ac587f7fdb7 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -31,12 +31,16 @@ #include "mongo/db/diskloc.h" #include "mongo/db/index/index_cursor.h" #include "mongo/db/index/index_descriptor.h" +#include "mongo/db/index/key_generator.h" #include "mongo/db/jsobj.h" namespace mongo { class UpdateTicket; + class InsertTicket; + struct InsertDeleteOptions; + struct PregeneratedKeysOnIndex; /** * An IndexAccessMethod is the interface through which all the mutation, lookup, and @@ -61,15 +65,20 @@ namespace mongo { * 'loc') into the index. 'obj' is the object at the location 'loc'. If not NULL, * 'numInserted' will be set to the number of keys added to the index for the document. If * there is more than one key for 'obj', either all keys will be inserted or none will. - * - * The behavior of the insertion can be specified through 'options'. + * + * The behavior of the insertion can be specified through 'options'. + * + * prepared: if you generated keys before, you can pass the generator you used + * and the keys you got. If the generator matches, the keys are used. Otherwise we + * generate our own keys and you do not have to do anything. */ virtual Status insert(const BSONObj& obj, const DiskLoc& loc, const InsertDeleteOptions& options, - int64_t* numInserted) = 0; + int64_t* numInserted, + const PregeneratedKeysOnIndex* prepared = NULL ) = 0; - /** + /** * Analogous to above, but remove the records instead of inserting them. If not NULL, * numDeleted will be set to the number of keys removed from the index for the document. */ @@ -129,6 +138,14 @@ namespace mongo { virtual Status touch(const BSONObj& obj) = 0; /** + * Try to page-in the pages that contain the keys. + * This can be used to speed up future accesses to an index by trying to ensure the + * appropriate pages are not swapped out. + * See prefetch.cpp. + */ + virtual Status touch(const BSONObjSet& keys) = 0; + + /** * Walk the entire index, checking the internal structure for consistency. * Set numKeys to the number of keys in the index. * @@ -169,6 +186,12 @@ namespace mongo { virtual Status commitBulk( IndexAccessMethod* bulk, bool mayInterrupt, std::set<DiskLoc>* dups ) = 0; + + /** + * this returns a shared_ptr so that someone can get all the generators in a lock, + * then unlock, generate keys, and then re-lock and use those keys + */ + virtual shared_ptr<KeyGenerator> getKeyGenerator() const = 0; }; /** diff --git a/src/mongo/db/index/key_generator.cpp b/src/mongo/db/index/key_generator.cpp new file mode 100644 index 00000000000..009aef8a931 --- /dev/null +++ b/src/mongo/db/index/key_generator.cpp @@ -0,0 +1,40 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/index/key_generator.h" + +namespace mongo { + namespace { + AtomicUInt64 NEXT; + } + + KeyGenerator::KeyGenerator() { + _id = NEXT.fetchAndAdd(1); + } + +} diff --git a/src/mongo/db/index/key_generator.h b/src/mongo/db/index/key_generator.h new file mode 100644 index 00000000000..0cfbd3df5ef --- /dev/null +++ b/src/mongo/db/index/key_generator.h @@ -0,0 +1,48 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonmisc.h" + +namespace mongo { + + class KeyGenerator { + public: + KeyGenerator(); + virtual ~KeyGenerator() {} + + virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const = 0; + + int64_t getId() const { return _id; } + private: + int64_t _id; + }; + +} // namespace mongo diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp index 9cbfd2d418b..50df4f34711 100644 --- a/src/mongo/db/index/s2_access_method.cpp +++ b/src/mongo/db/index/s2_access_method.cpp @@ -31,14 +31,19 @@ #include <vector> #include "mongo/base/status.h" -#include "mongo/db/geo/geoparser.h" #include "mongo/db/geo/geoconstants.h" +#include "mongo/db/geo/geoparser.h" +#include "mongo/db/geo/geoquery.h" +#include "mongo/db/geo/s2.h" #include "mongo/db/geo/s2common.h" -#include "mongo/db/index_names.h" -#include "mongo/db/index/expression_keys_private.h" #include "mongo/db/index/expression_params.h" +#include "mongo/db/index/s2_key_generator.h" +#include "mongo/db/index_names.h" #include "mongo/db/jsobj.h" +#include "third_party/s2/s2cell.h" +#include "third_party/s2/s2regioncoverer.h" + namespace mongo { static const string kIndexVersionFieldName("2dsphereIndexVersion"); @@ -51,6 +56,8 @@ namespace mongo { ExpressionParams::parse2dsphereParams(descriptor->infoObj(), &_params); + _keyGenerator.reset( new S2KeyGenerator( descriptor->keyPattern(), _params ) ); + int geoFields = 0; // Categorize the fields we're indexing and make sure we have a geo field. @@ -101,7 +108,7 @@ namespace mongo { } void S2AccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - ExpressionKeysPrivate::getS2Keys(obj, _descriptor->keyPattern(), _params, keys); + return _keyGenerator->getKeys( obj, keys ); } } // namespace mongo diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h index df12026395b..efc79ef9df4 100644 --- a/src/mongo/db/index/s2_access_method.h +++ b/src/mongo/db/index/s2_access_method.h @@ -54,10 +54,12 @@ namespace mongo { */ static BSONObj fixSpec(const BSONObj& specObj); + virtual shared_ptr<KeyGenerator> getKeyGenerator() const { return _keyGenerator; } private: virtual void getKeys(const BSONObj& obj, BSONObjSet* keys); S2IndexingParams _params; + shared_ptr<KeyGenerator> _keyGenerator; }; } // namespace mongo diff --git a/src/mongo/db/index/s2_key_generator.cpp b/src/mongo/db/index/s2_key_generator.cpp new file mode 100644 index 00000000000..2b925964bd8 --- /dev/null +++ b/src/mongo/db/index/s2_key_generator.cpp @@ -0,0 +1,272 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#include "mongo/db/index/s2_access_method.h" + +#include <vector> + +#include "mongo/base/status.h" +#include "mongo/db/geo/geoconstants.h" +#include "mongo/db/geo/geoparser.h" +#include "mongo/db/geo/geoquery.h" +#include "mongo/db/geo/s2.h" +#include "mongo/db/geo/s2common.h" +#include "mongo/db/index/expression_params.h" +#include "mongo/db/index/s2_key_generator.h" +#include "mongo/db/index_names.h" +#include "mongo/db/jsobj.h" + +#include "third_party/s2/s2cell.h" +#include "third_party/s2/s2regioncoverer.h" + +namespace mongo { + + S2KeyGenerator::S2KeyGenerator( const BSONObj& keyPattern, const S2IndexingParams& params ) + : _keyPattern( keyPattern.getOwned() ), _params( params ) { + } + + // + // Helper functions for getS2Keys + // + + static void S2KeysFromRegion(S2RegionCoverer *coverer, const S2Region ®ion, + vector<string> *out) { + vector<S2CellId> covering; + coverer->GetCovering(region, &covering); + for (size_t i = 0; i < covering.size(); ++i) { + out->push_back(covering[i].toString()); + } + } + + bool S2GetKeysForObject(const BSONObj& obj, + const S2IndexingParams& params, + vector<string>* out) { + S2RegionCoverer coverer; + params.configureCoverer(&coverer); + + GeometryContainer geoContainer; + if (!geoContainer.parseFrom(obj)) { return false; } + + // Only certain geometries can be indexed in the old index format S2_INDEX_VERSION_1. See + // definition of S2IndexVersion for details. + if (params.indexVersion == S2_INDEX_VERSION_1 && !geoContainer.isSimpleContainer()) { + return false; + } + + if (!geoContainer.hasS2Region()) { return false; } + + S2KeysFromRegion(&coverer, geoContainer.getRegion(), out); + + return true; + } + + /** + * Get the index keys for elements that are GeoJSON. + * Used by getS2Keys. + */ + void getS2GeoKeys(const BSONObj& document, const BSONElementSet& elements, + const S2IndexingParams& params, + BSONObjSet* out) { + for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { + uassert(16754, "Can't parse geometry from element: " + i->toString(), + i->isABSONObj()); + const BSONObj &geoObj = i->Obj(); + + vector<string> cells; + bool succeeded = S2GetKeysForObject(geoObj, params, &cells); + uassert(16755, "Can't extract geo keys from object, malformed geometry?: " + + document.toString(), succeeded); + + uassert(16756, "Unable to generate keys for (likely malformed) geometry: " + + document.toString(), + cells.size() > 0); + + for (vector<string>::const_iterator it = cells.begin(); it != cells.end(); ++it) { + BSONObjBuilder b; + b.append("", *it); + out->insert(b.obj()); + } + } + + if (0 == out->size()) { + BSONObjBuilder b; + b.appendNull(""); + out->insert(b.obj()); + } + } + + /** + * Expands array and appends items to 'out'. + * Used by getOneLiteralKey. + */ + void getS2LiteralKeysArray(const BSONObj& obj, BSONObjSet* out) { + BSONObjIterator objIt(obj); + if (!objIt.more()) { + // Empty arrays are indexed as undefined. + BSONObjBuilder b; + b.appendUndefined(""); + out->insert(b.obj()); + } else { + // Non-empty arrays are exploded. + while (objIt.more()) { + BSONObjBuilder b; + b.appendAs(objIt.next(), ""); + out->insert(b.obj()); + } + } + } + + /** + * If 'elt' is an array, expands elt and adds items to 'out'. + * Otherwise, adds 'elt' as a single element. + * Used by getLiteralKeys. + */ + void getS2OneLiteralKey(const BSONElement& elt, BSONObjSet* out) { + if (Array == elt.type()) { + getS2LiteralKeysArray(elt.Obj(), out); + } else { + // One thing, not an array, index as-is. + BSONObjBuilder b; + b.appendAs(elt, ""); + out->insert(b.obj()); + } + } + + /** + * elements is a non-geo field. Add the values literally, expanding arrays. + * Used by getS2Keys. + */ + void getS2LiteralKeys(const BSONElementSet& elements, BSONObjSet* out) { + if (0 == elements.size()) { + // Missing fields are indexed as null. + BSONObjBuilder b; + b.appendNull(""); + out->insert(b.obj()); + } else { + for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { + getS2OneLiteralKey(*i, out); + } + } + } + + void S2KeyGenerator::getKeys(const BSONObj& obj, BSONObjSet* keys) const { + BSONObjSet keysToAdd; + + // Does one of our documents have a geo field? + bool haveGeoField = false; + + // We output keys in the same order as the fields we index. + BSONObjIterator i(_keyPattern); + while (i.more()) { + BSONElement e = i.next(); + + // First, we get the keys that this field adds. Either they're added literally from + // the value of the field, or they're transformed if the field is geo. + BSONElementSet fieldElements; + // false means Don't expand the last array, duh. + obj.getFieldsDotted(e.fieldName(), fieldElements, false); + + BSONObjSet keysForThisField; + if (IndexNames::GEO_2DSPHERE == e.valuestr()) { + if (S2_INDEX_VERSION_2 == _params.indexVersion) { + // For V2, + // geo: null, + // geo: undefined + // geo: [] + // should all behave like there is no geo field. So we look for these cases + // and throw out the field elements if we find them. + if (1 == fieldElements.size()) { + BSONElement elt = *fieldElements.begin(); + // Get the :null and :undefined cases. + if (elt.isNull() || Undefined == elt.type()) { + fieldElements.clear(); + } + else if (elt.isABSONObj()) { + // And this is the :[] case. + BSONObj obj = elt.Obj(); + if (0 == obj.nFields()) { + fieldElements.clear(); + } + } + } + + // V2 2dsphere indices require that at least one geo field to be present in a + // document in order to index it. + if (fieldElements.size() > 0) { + haveGeoField = true; + } + } + + getS2GeoKeys(obj, fieldElements, _params, &keysForThisField); + } else { + getS2LiteralKeys(fieldElements, &keysForThisField); + } + + // We expect there to be the missing field element present in the keys if data is + // missing. So, this should be non-empty. + verify(!keysForThisField.empty()); + + // We take the Cartesian product of all of the keys. This requires that we have + // some keys to take the Cartesian product with. If keysToAdd.empty(), we + // initialize it. + if (keysToAdd.empty()) { + keysToAdd = keysForThisField; + continue; + } + + BSONObjSet updatedKeysToAdd; + for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); + ++it) { + for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); + newIt!= keysForThisField.end(); ++newIt) { + BSONObjBuilder b; + b.appendElements(*it); + b.append(newIt->firstElement()); + updatedKeysToAdd.insert(b.obj()); + } + } + keysToAdd = updatedKeysToAdd; + } + + // Make sure that if we're V2 there's at least one geo field present in the doc. + if (S2_INDEX_VERSION_2 == _params.indexVersion) { + if (!haveGeoField) { + return; + } + } + + if (keysToAdd.size() > _params.maxKeysPerInsert) { + warning() << "insert of geo object generated lots of keys (" << keysToAdd.size() + << ") consider creating larger buckets. obj=" + << obj; + } + + *keys = keysToAdd; + } + +} // namespace mongo diff --git a/src/mongo/db/index/s2_key_generator.h b/src/mongo/db/index/s2_key_generator.h new file mode 100644 index 00000000000..2b96ea8187b --- /dev/null +++ b/src/mongo/db/index/s2_key_generator.h @@ -0,0 +1,56 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#pragma once + +#include "mongo/base/status.h" +#include "mongo/db/geo/s2common.h" +#include "mongo/db/index/btree_based_access_method.h" +#include "mongo/db/index/index_descriptor.h" +#include "mongo/db/jsobj.h" + +namespace mongo { + + class IndexCursor; + struct S2IndexingParams; + + class S2KeyGenerator : public KeyGenerator { + public: + S2KeyGenerator( const BSONObj& keyPattern, const S2IndexingParams& params ); + + virtual ~S2KeyGenerator(){} + + virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const; + + private: + BSONObj _keyPattern; + S2IndexingParams _params; + + }; + +} // namespace mongo diff --git a/src/mongo/db/index_legacy.cpp b/src/mongo/db/index_legacy.cpp index 8d63ce1cbe3..3199c5958da 100644 --- a/src/mongo/db/index_legacy.cpp +++ b/src/mongo/db/index_legacy.cpp @@ -30,7 +30,7 @@ #include "mongo/db/client.h" #include "mongo/db/fts/fts_spec.h" -#include "mongo/db/index/expression_keys_private.h" +#include "mongo/db/index/hash_key_generator.h" #include "mongo/db/index/s2_access_method.h" #include "mongo/db/index_names.h" #include "mongo/db/jsobj.h" @@ -72,7 +72,7 @@ namespace mongo { // alter the data format). Additionally, in certain places the hashed index code and // the index bound calculation code assume null and missing are indexed identically. BSONObj nullObj = BSON("" << BSONNULL); - return BSON("" << ExpressionKeysPrivate::makeSingleHashKey(nullObj.firstElement(), seed, hashVersion)); + return BSON("" << HashKeyGenerator::makeSingleHashKey(nullObj.firstElement(), seed, hashVersion)); } else { BSONObjBuilder b; diff --git a/src/mongo/db/instance.cpp b/src/mongo/db/instance.cpp index 58cf5fa55e0..4c1ad157ee9 100644 --- a/src/mongo/db/instance.cpp +++ b/src/mongo/db/instance.cpp @@ -801,7 +801,8 @@ namespace mongo { return ok; } - void checkAndInsert(Client::Context& ctx, const char *ns, /*modifies*/BSONObj& js) { + void checkAndInsert(Client::Context& ctx, const char *ns, /*modifies*/BSONObj& js, + PregeneratedKeys* preGen ) { if ( nsToCollectionSubstring( ns ) == "system.indexes" ) { string targetNS = js["ns"].String(); uassertStatusOK( userAllowedWriteNS( targetNS ) ); @@ -841,7 +842,7 @@ namespace mongo { verify( collection ); } - StatusWith<DiskLoc> status = collection->insertDocument( js, true ); + StatusWith<DiskLoc> status = collection->insertDocument( js, true, preGen ); uassertStatusOK( status.getStatus() ); logOp("i", ns, js); } @@ -850,7 +851,7 @@ namespace mongo { size_t i; for (i=0; i<objs.size(); i++){ try { - checkAndInsert(ctx, ns, objs[i]); + checkAndInsert(ctx, ns, objs[i], NULL); getDur().commitIfNeeded(); } catch (const UserException&) { if (!keepGoing || i == objs.size()-1){ @@ -890,25 +891,45 @@ namespace mongo { uassertStatusOK(status); } + PregeneratedKeys tempHack; + if ( multi.size() == 1 ) { + StatusWith<BSONObj> fixed = fixDocumentForInsert( multi[0] ); + uassertStatusOK( fixed.getStatus() ); + if ( !fixed.getValue().isEmpty() ) + multi[0] = fixed.getValue(); + + bool hasEntry = GeneratorHolder::getInstance()->prepare( ns, multi[0], &tempHack ); + if ( !hasEntry ) { + // lets load! + Client::ReadContext ctx(ns); + Database* db = ctx.ctx().db(); + Collection * c = db->getCollection( ns ); + if ( c ) { + GeneratorHolder::getInstance()->reset( c ); + } + } + } + PageFaultRetryableSection s; while ( true ) { try { Lock::DBWrite lk(ns); - + // CONCURRENCY TODO: is being read locked in big log sufficient here? // writelock is used to synchronize stepdowns w/ writes uassert( 10058 , "not master", isMasterNs(ns) ); - + if ( handlePossibleShardedMessage( m , 0 ) ) return; - + Client::Context ctx(ns); - + if (multi.size() > 1) { const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError; insertMulti(ctx, keepGoing, ns, multi, op); - } else { - checkAndInsert(ctx, ns, multi[0]); + } + else { + checkAndInsert(ctx, ns, multi[0], &tempHack); globalOpCounters.incInsertInWriteLock(1); op.debug().ninserted = 1; } diff --git a/src/mongo/db/pdfile.cpp b/src/mongo/db/pdfile.cpp index 1719706451c..82567489193 100644 --- a/src/mongo/db/pdfile.cpp +++ b/src/mongo/db/pdfile.cpp @@ -188,6 +188,8 @@ namespace mongo { audit::logDropDatabase( currentClient.get(), db ); + GeneratorHolder::getInstance()->droppedDatabase( db ); + // Not sure we need this here, so removed. If we do, we need to move it down // within other calls both (1) as they could be called from elsewhere and // (2) to keep the lock order right - groupcommitmutex must be locked before diff --git a/src/mongo/dbtests/namespacetests.cpp b/src/mongo/dbtests/namespacetests.cpp index 4a6a3311fc8..90af08a8637 100644 --- a/src/mongo/dbtests/namespacetests.cpp +++ b/src/mongo/dbtests/namespacetests.cpp @@ -34,7 +34,7 @@ #include "mongo/db/db.h" #include "mongo/db/index/btree_key_generator.h" -#include "mongo/db/index/expression_keys_private.h" +#include "mongo/db/index/hash_key_generator.h" #include "mongo/db/index_legacy.h" #include "mongo/db/index_names.h" #include "mongo/db/json.h" @@ -987,10 +987,11 @@ namespace NamespaceTests { // Call getKeys on the nullObj. BSONObjSet nullFieldKeySet; - ExpressionKeysPrivate::getHashKeys(nullObj, "a", 0, 0, false, &nullFieldKeySet); + HashKeyGenerator gen( "a", 0, 0, false ); + gen.getKeys( nullObj, &nullFieldKeySet); BSONElement nullFieldFromKey = nullFieldKeySet.begin()->firstElement(); - ASSERT_EQUALS( ExpressionKeysPrivate::makeSingleHashKey( nullObj.firstElement(), 0, 0 ), + ASSERT_EQUALS( HashKeyGenerator::makeSingleHashKey( nullObj.firstElement(), 0, 0 ), nullFieldFromKey.Long() ); BSONObj missingField = IndexLegacy::getMissingField(NULL,spec); @@ -1010,10 +1011,11 @@ namespace NamespaceTests { BSONObj nullObj = BSON( "a" << BSONNULL ); BSONObjSet nullFieldKeySet; - ExpressionKeysPrivate::getHashKeys(nullObj, "a", 0x5eed, 0, false, &nullFieldKeySet); + HashKeyGenerator gen( "a", 0x5eed, 0, false ); + gen.getKeys(nullObj, &nullFieldKeySet); BSONElement nullFieldFromKey = nullFieldKeySet.begin()->firstElement(); - ASSERT_EQUALS( ExpressionKeysPrivate::makeSingleHashKey( nullObj.firstElement(), 0x5eed, 0 ), + ASSERT_EQUALS( HashKeyGenerator::makeSingleHashKey( nullObj.firstElement(), 0x5eed, 0 ), nullFieldFromKey.Long() ); // Ensure that getMissingField recognizes that the seed is different (and returns |