diff options
Diffstat (limited to 'src/mongo/db/index.cpp')
-rw-r--r-- | src/mongo/db/index.cpp | 446 |
1 files changed, 446 insertions, 0 deletions
diff --git a/src/mongo/db/index.cpp b/src/mongo/db/index.cpp new file mode 100644 index 00000000000..5eaeab551df --- /dev/null +++ b/src/mongo/db/index.cpp @@ -0,0 +1,446 @@ +/** @file index.cpp */ + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "pch.h" +#include "namespace-inl.h" +#include "index.h" +#include "btree.h" +#include "background.h" +#include "repl/rs.h" +#include "ops/delete.h" + + +namespace mongo { + + template< class V > + class IndexInterfaceImpl : public IndexInterface { + public: + typedef typename V::KeyOwned KeyOwned; + typedef Continuation<V> Cont; + virtual int keyCompare(const BSONObj& l,const BSONObj& r, const Ordering &ordering); + + Cont *c[NamespaceDetails::NIndexesMax]; + int n; + + public: + IndexInterfaceImpl() { n = 0; } + + /* lacking CONCURRENCY WRITE this supports only one writer */ + void _phasedBegin() { + // we do this here as phasedFinish can throw exceptions (we could catch there, but just as easy to do here) + for( int i = 0; i < n; i++ ) { + delete c[i]; + c[i] = 0; // defensive + } + n = 0; + } + void phasedQueueItemToInsert( + int idxNo, + DiskLoc thisLoc, DiskLoc _recordLoc, const BSONObj &_key, + const Ordering& _order, IndexDetails& _idx, bool dupsAllowed) + { + if( idxNo >= n ) + n = idxNo + 1; + Cont *C = c[idxNo] = new Cont(thisLoc, _recordLoc, _key, _order, _idx); + thisLoc.btree<V>()->twoStepInsert(thisLoc, *C, dupsAllowed); + } + void _phasedFinish() { + for( int i = 0; i < n; i++ ) { + // if mixing v0 and v1 indexes, in that case (only) there could be nulls in the list + if( c[i] ) { + c[i]->stepTwo(); + } + } + } + +/* virtual DiskLoc locate(const IndexDetails &idx , const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order, + int& pos, bool& found, const DiskLoc &recordLoc, int direction) { + return thisLoc.btree<V>()->locate(idx, thisLoc, key, order, pos, found, recordLoc, direction); + } + */ + virtual long long fullValidate(const DiskLoc& thisLoc, const BSONObj &order) { + return thisLoc.btree<V>()->fullValidate(thisLoc, order); + } + virtual DiskLoc findSingle(const IndexDetails &indexdetails , const DiskLoc& thisLoc, const BSONObj& key) const { + return thisLoc.btree<V>()->findSingle(indexdetails,thisLoc,key); + } + virtual bool unindex(const DiskLoc thisLoc, IndexDetails& id, const BSONObj& key, const DiskLoc recordLoc) const { + return thisLoc.btree<V>()->unindex(thisLoc, id, key, recordLoc); + } + virtual int bt_insert(const DiskLoc thisLoc, const DiskLoc recordLoc, + const BSONObj& key, const Ordering &order, bool dupsAllowed, + IndexDetails& idx, bool toplevel = true) const { + return thisLoc.btree<V>()->bt_insert(thisLoc, recordLoc, key, order, dupsAllowed, idx, toplevel); + } + virtual DiskLoc addBucket(const IndexDetails& id) { + return BtreeBucket<V>::addBucket(id); + } + virtual void uassertIfDups(IndexDetails& idx, vector<BSONObj*>& addedKeys, DiskLoc head, DiskLoc self, const Ordering& ordering) { + const BtreeBucket<V> *h = head.btree<V>(); + for( vector<BSONObj*>::iterator i = addedKeys.begin(); i != addedKeys.end(); i++ ) { + KeyOwned k(**i); + bool dup = h->wouldCreateDup(idx, head, k, ordering, self); + uassert( 11001 , h->dupKeyError( idx , k ) , !dup); + } + } + + // for geo: + virtual bool isUsed(DiskLoc thisLoc, int pos) { return thisLoc.btree<V>()->isUsed(pos); } + virtual void keyAt(DiskLoc thisLoc, int pos, BSONObj& key, DiskLoc& recordLoc) { + recordLoc = DiskLoc(); + const BtreeBucket<V>* bucket = thisLoc.btree<V>(); + int n = bucket->nKeys(); + + if( pos < 0 || pos >= n || n == 0xffff /* bucket deleted */ || ! bucket->isUsed( pos ) ){ + // log() << "Pos: " << pos << " n " << n << endl; + return; + } + + typename BtreeBucket<V>::KeyNode kn = bucket->keyNode(pos); + key = kn.key.toBson(); + recordLoc = kn.recordLoc; + } + virtual BSONObj keyAt(DiskLoc thisLoc, int pos) { + return thisLoc.btree<V>()->keyAt(pos).toBson(); + } + virtual DiskLoc locate(const IndexDetails &idx , const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order, + int& pos, bool& found, const DiskLoc &recordLoc, int direction=1) { + return thisLoc.btree<V>()->locate(idx, thisLoc, key, order, pos, found, recordLoc, direction); + } + virtual DiskLoc advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) { + return thisLoc.btree<V>()->advance(thisLoc,keyOfs,direction,caller); + } + }; + + int oldCompare(const BSONObj& l,const BSONObj& r, const Ordering &o); // key.cpp + + template <> + int IndexInterfaceImpl< V0 >::keyCompare(const BSONObj& l, const BSONObj& r, const Ordering &ordering) { + return oldCompare(l, r, ordering); + } + + template <> + int IndexInterfaceImpl< V1 >::keyCompare(const BSONObj& l, const BSONObj& r, const Ordering &ordering) { + return l.woCompare(r, ordering, /*considerfieldname*/false); + } + + IndexInterfaceImpl<V0> iii_v0; + IndexInterfaceImpl<V1> iii_v1; + + IndexInterface *IndexDetails::iis[] = { &iii_v0, &iii_v1 }; + + void IndexInterface::phasedBegin() { + iii_v0._phasedBegin(); + iii_v1._phasedBegin(); + } + void IndexInterface::phasedFinish() { + iii_v0._phasedFinish(); + iii_v1._phasedFinish(); + } + + int removeFromSysIndexes(const char *ns, const char *idxName) { + string system_indexes = cc().database()->name + ".system.indexes"; + BSONObjBuilder b; + b.append("ns", ns); + b.append("name", idxName); // e.g.: { name: "ts_1", ns: "foo.coll" } + BSONObj cond = b.done(); + return (int) deleteObjects(system_indexes.c_str(), cond, false, false, true); + } + + /* this is just an attempt to clean up old orphaned stuff on a delete all indexes + call. repair database is the clean solution, but this gives one a lighter weight + partial option. see dropIndexes() + */ + void assureSysIndexesEmptied(const char *ns, IndexDetails *idIndex) { + string system_indexes = cc().database()->name + ".system.indexes"; + BSONObjBuilder b; + b.append("ns", ns); + if( idIndex ) { + b.append("name", BSON( "$ne" << idIndex->indexName().c_str() )); + } + BSONObj cond = b.done(); + int n = (int) deleteObjects(system_indexes.c_str(), cond, false, false, true); + if( n ) { + log() << "info: assureSysIndexesEmptied cleaned up " << n << " entries" << endl; + } + } + + int IndexDetails::keyPatternOffset( const string& key ) const { + BSONObjIterator i( keyPattern() ); + int n = 0; + while ( i.more() ) { + BSONElement e = i.next(); + if ( key == e.fieldName() ) + return n; + n++; + } + return -1; + } + + const IndexSpec& IndexDetails::getSpec() const { + SimpleMutex::scoped_lock lk(NamespaceDetailsTransient::_qcMutex); + return NamespaceDetailsTransient::get_inlock( info.obj()["ns"].valuestr() ).getIndexSpec( this ); + } + + /* delete this index. does NOT clean up the system catalog + (system.indexes or system.namespaces) -- only NamespaceIndex. + */ + void IndexDetails::kill_idx() { + string ns = indexNamespace(); // e.g. foo.coll.$ts_1 + try { + + string pns = parentNS(); // note we need a copy, as parentNS() won't work after the drop() below + + // clean up parent namespace index cache + NamespaceDetailsTransient::get( pns.c_str() ).deletedIndex(); + + string name = indexName(); + + /* important to catch exception here so we can finish cleanup below. */ + try { + dropNS(ns.c_str()); + } + catch(DBException& ) { + log(2) << "IndexDetails::kill(): couldn't drop ns " << ns << endl; + } + head.setInvalid(); + info.setInvalid(); + + // clean up in system.indexes. we do this last on purpose. + int n = removeFromSysIndexes(pns.c_str(), name.c_str()); + wassert( n == 1 ); + + } + catch ( DBException &e ) { + log() << "exception in kill_idx: " << e << ", ns: " << ns << endl; + } + } + + void IndexDetails::getKeysFromObject( const BSONObj& obj, BSONObjSet& keys) const { + getSpec().getKeys( obj, keys ); + } + + void setDifference(BSONObjSet &l, BSONObjSet &r, vector<BSONObj*> &diff) { + // l and r must use the same ordering spec. + verify( 14819, l.key_comp().order() == r.key_comp().order() ); + BSONObjSet::iterator i = l.begin(); + BSONObjSet::iterator j = r.begin(); + while ( 1 ) { + if ( i == l.end() ) + break; + while ( j != r.end() && j->woCompare( *i ) < 0 ) + j++; + if ( j == r.end() || i->woCompare(*j) != 0 ) { + const BSONObj *jo = &*i; + diff.push_back( (BSONObj *) jo ); + } + i++; + } + } + + void getIndexChanges(vector<IndexChanges>& v, NamespaceDetails& d, BSONObj newObj, BSONObj oldObj, bool &changedId) { + int z = d.nIndexesBeingBuilt(); + v.resize(z); + for( int i = 0; i < z; i++ ) { + IndexDetails& idx = d.idx(i); + BSONObj idxKey = idx.info.obj().getObjectField("key"); // eg { ts : 1 } + IndexChanges& ch = v[i]; + idx.getKeysFromObject(oldObj, ch.oldkeys); + idx.getKeysFromObject(newObj, ch.newkeys); + if( ch.newkeys.size() > 1 ) + d.setIndexIsMultikey(i); + setDifference(ch.oldkeys, ch.newkeys, ch.removed); + setDifference(ch.newkeys, ch.oldkeys, ch.added); + if ( ch.removed.size() > 0 && ch.added.size() > 0 && idx.isIdIndex() ) { + changedId = true; + } + } + } + + void dupCheck(vector<IndexChanges>& v, NamespaceDetails& d, DiskLoc curObjLoc) { + int z = d.nIndexesBeingBuilt(); + for( int i = 0; i < z; i++ ) { + IndexDetails& idx = d.idx(i); + v[i].dupCheck(idx, curObjLoc); + } + } + + // should be { <something> : <simpletype[1|-1]>, .keyp.. } + static bool validKeyPattern(BSONObj kp) { + BSONObjIterator i(kp); + while( i.moreWithEOO() ) { + BSONElement e = i.next(); + if( e.type() == Object || e.type() == Array ) + return false; + } + return true; + } + + /* Prepare to build an index. Does not actually build it (except for a special _id case). + - We validate that the params are good + - That the index does not already exist + - Creates the source collection if it DNE + + example of 'io': + { ns : 'test.foo', name : 'z', key : { z : 1 } } + + throws DBException + + @param sourceNS - source NS we are indexing + @param sourceCollection - its details ptr + @return true if ok to continue. when false we stop/fail silently (index already exists) + */ + bool prepareToBuildIndex(const BSONObj& io, bool god, string& sourceNS, NamespaceDetails *&sourceCollection, BSONObj& fixedIndexObject ) { + sourceCollection = 0; + + // logical name of the index. todo: get rid of the name, we don't need it! + const char *name = io.getStringField("name"); + uassert(12523, "no index name specified", *name); + + // the collection for which we are building an index + sourceNS = io.getStringField("ns"); + uassert(10096, "invalid ns to index", sourceNS.find( '.' ) != string::npos); + uassert(10097, "bad table to index name on add index attempt", + cc().database()->name == nsToDatabase(sourceNS.c_str())); + + BSONObj key = io.getObjectField("key"); + uassert(12524, "index key pattern too large", key.objsize() <= 2048); + if( !validKeyPattern(key) ) { + string s = string("bad index key pattern ") + key.toString(); + uasserted(10098 , s.c_str()); + } + + if ( sourceNS.empty() || key.isEmpty() ) { + log(2) << "bad add index attempt name:" << (name?name:"") << "\n ns:" << + sourceNS << "\n idxobj:" << io.toString() << endl; + string s = "bad add index attempt " + sourceNS + " key:" + key.toString(); + uasserted(12504, s); + } + + sourceCollection = nsdetails(sourceNS.c_str()); + if( sourceCollection == 0 ) { + // try to create it + string err; + if ( !userCreateNS(sourceNS.c_str(), BSONObj(), err, false) ) { + problem() << "ERROR: failed to create collection while adding its index. " << sourceNS << endl; + return false; + } + sourceCollection = nsdetails(sourceNS.c_str()); + tlog() << "info: creating collection " << sourceNS << " on add index" << endl; + assert( sourceCollection ); + } + + if ( sourceCollection->findIndexByName(name) >= 0 ) { + // index already exists. + return false; + } + if( sourceCollection->findIndexByKeyPattern(key) >= 0 ) { + log(2) << "index already exists with diff name " << name << ' ' << key.toString() << endl; + return false; + } + + if ( sourceCollection->nIndexes >= NamespaceDetails::NIndexesMax ) { + stringstream ss; + ss << "add index fails, too many indexes for " << sourceNS << " key:" << key.toString(); + string s = ss.str(); + log() << s << '\n'; + uasserted(12505,s); + } + + /* we can't build a new index for the ns if a build is already in progress in the background - + EVEN IF this is a foreground build. + */ + uassert(12588, "cannot add index with a background operation in progress", + !BackgroundOperation::inProgForNs(sourceNS.c_str())); + + /* this is because we want key patterns like { _id : 1 } and { _id : <someobjid> } to + all be treated as the same pattern. + */ + if ( IndexDetails::isIdIndexPattern(key) ) { + if( !god ) { + ensureHaveIdIndex( sourceNS.c_str() ); + return false; + } + } + else { + /* is buildIndexes:false set for this replica set member? + if so we don't build any indexes except _id + */ + if( theReplSet && !theReplSet->buildIndexes() ) + return false; + } + + string pluginName = IndexPlugin::findPluginName( key ); + IndexPlugin * plugin = pluginName.size() ? IndexPlugin::get( pluginName ) : 0; + + + { + BSONObj o = io; + if ( plugin ) { + o = plugin->adjustIndexSpec(o); + } + BSONObjBuilder b; + int v = DefaultIndexVersionNumber; + if( !o["v"].eoo() ) { + double vv = o["v"].Number(); + // note (one day) we may be able to fresh build less versions than we can use + // isASupportedIndexVersionNumber() is what we can use + uassert(14803, str::stream() << "this version of mongod cannot build new indexes of version number " << vv, + vv == 0 || vv == 1); + v = (int) vv; + } + // idea is to put things we use a lot earlier + b.append("v", v); + b.append(o["key"]); + if( o["unique"].trueValue() ) + b.appendBool("unique", true); // normalize to bool true in case was int 1 or something... + b.append(o["ns"]); + + { + // stripping _id + BSONObjIterator i(o); + while ( i.more() ) { + BSONElement e = i.next(); + string s = e.fieldName(); + if( s != "_id" && s != "v" && s != "ns" && s != "unique" && s != "key" ) + b.append(e); + } + } + + fixedIndexObject = b.obj(); + } + + return true; + } + + void IndexSpec::reset( const IndexDetails * details ) { + _details = details; + reset( details->info ); + } + + void IndexSpec::reset( const BSONObj& _info ) { + info = _info; + keyPattern = info["key"].embeddedObjectUserCheck(); + if ( keyPattern.objsize() == 0 ) { + out() << info.toString() << endl; + assert(false); + } + _init(); + } + +} |