diff options
author | Hari Khalsa <hkhalsa@10gen.com> | 2013-12-11 16:55:46 -0500 |
---|---|---|
committer | Hari Khalsa <hkhalsa@10gen.com> | 2013-12-12 09:54:02 -0500 |
commit | 143e99cf0c36aaffbdbc74d4a8a2a7f50f119139 (patch) | |
tree | b25924ac86ae6ad960942de1c797a18f4accd859 /src/mongo/db/fts | |
parent | 172bf6a4e24a65da9a6269507c2385c797d35cb7 (diff) | |
download | mongo-143e99cf0c36aaffbdbc74d4a8a2a7f50f119139.tar.gz |
SERVER-10026 migrate text cmd to use query exec
Diffstat (limited to 'src/mongo/db/fts')
-rw-r--r-- | src/mongo/db/fts/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_command.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_command_mongod.cpp | 152 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_search.cpp | 193 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_search.h | 114 |
5 files changed, 69 insertions, 392 deletions
diff --git a/src/mongo/db/fts/SConscript b/src/mongo/db/fts/SConscript index b0b13a443c1..7ec267cc6ad 100644 --- a/src/mongo/db/fts/SConscript +++ b/src/mongo/db/fts/SConscript @@ -48,7 +48,6 @@ env.Library( 'server_common', [ env.Library('ftsmongod', [ 'fts_command_mongod.cpp', - 'fts_search.cpp', ], LIBDEPS=["base","server_common"]) diff --git a/src/mongo/db/fts/fts_command.cpp b/src/mongo/db/fts/fts_command.cpp index f7a105b772b..e861734a1da 100644 --- a/src/mongo/db/fts/fts_command.cpp +++ b/src/mongo/db/fts/fts_command.cpp @@ -33,7 +33,6 @@ #include "mongo/db/fts/fts_command.h" #include "mongo/db/fts/fts_enabled.h" -#include "mongo/db/fts/fts_search.h" #include "mongo/db/fts/fts_util.h" #include "mongo/util/mongoutils/str.h" #include "mongo/util/timer.h" diff --git a/src/mongo/db/fts/fts_command_mongod.cpp b/src/mongo/db/fts/fts_command_mongod.cpp index 74022b7b9d4..e4ec4b56d95 100644 --- a/src/mongo/db/fts/fts_command_mongod.cpp +++ b/src/mongo/db/fts/fts_command_mongod.cpp @@ -28,17 +28,13 @@ * it in the license file. */ -#include <algorithm> #include <string> -#include <vector> #include "mongo/db/fts/fts_command.h" -#include "mongo/db/fts/fts_search.h" #include "mongo/db/fts/fts_util.h" -#include "mongo/db/index/catalog_hack.h" -#include "mongo/db/index/fts_access_method.h" #include "mongo/db/pdfile.h" -#include "mongo/db/projection.h" +#include "mongo/db/query/get_runner.h" +#include "mongo/db/query/type_explain.h" #include "mongo/util/mongoutils/str.h" #include "mongo/util/timer.h" @@ -74,106 +70,96 @@ namespace mongo { Timer comm; - scoped_ptr<Projection> pr; - if ( !projection.isEmpty() ) { - pr.reset( new Projection() ); - pr->init( projection ); - } - - // priority queue for results - Results results; - - Database* db = cc().database(); - Collection* collection = db->getCollection( ns ); + // Rewrite the cmd as a normal query. + BSONObjBuilder queryBob; + queryBob.appendElements(filter); - if ( !collection ) { - errmsg = "can't find ns"; - return false; + BSONObjBuilder textBob; + textBob.append("$search", searchString); + if (!language.empty()) { + textBob.append("$language", language); } + queryBob.append("$text", textBob.obj()); - vector<int> idxMatches; - collection->details()->findIndexByType( INDEX_NAME, idxMatches ); - if ( idxMatches.size() == 0 ) { - errmsg = str::stream() << "no text index for: " << ns; - return false; - } - if ( idxMatches.size() > 1 ) { - errmsg = str::stream() << "too many text indexes for: " << ns; - return false; - } + // This is the query we exec. + BSONObj queryObj = queryBob.obj(); - BSONObj indexPrefix; + // We sort by the score. + BSONObj sortSpec = BSON("$s" << BSON("$meta" << "text")); - IndexDescriptor* descriptor = collection->getIndexCatalog()->getDescriptor(idxMatches[0]); - auto_ptr<FTSAccessMethod> fam(new FTSAccessMethod(descriptor)); - if ( language == "" ) { - language = fam->getSpec().defaultLanguage().str(); - } - Status s = fam->getSpec().getIndexPrefix( filter, &indexPrefix ); - if ( !s.isOK() ) { - errmsg = s.toString(); + // We also project the score into the document and strip it out later during the reformatting + // of the results. + BSONObjBuilder projBob; + projBob.appendElements(projection); + projBob.appendElements(sortSpec); + BSONObj projObj = projBob.obj(); + + CanonicalQuery* cq; + if (!CanonicalQuery::canonicalize(ns, queryObj, sortSpec, projObj, 0, limit, BSONObj(), &cq).isOK()) { + errmsg = "Can't parse filter / create query"; return false; } - - FTSQuery query; - if ( !query.parse( searchString, language ).isOK() ) { - errmsg = "can't parse search"; + Runner* rawRunner; + if (!getRunner(cq, &rawRunner, 0).isOK()) { + errmsg = "can't get query runner"; return false; } - result.append( "queryDebugString", query.debugString() ); - result.append( "language", language ); - FTSSearch search(descriptor, fam->getSpec(), indexPrefix, query, filter ); - search.go( &results, limit ); + auto_ptr<Runner> runner(rawRunner); - // grab underlying container inside priority queue - vector<ScoredLocation> r( results.dangerous() ); + BSONArrayBuilder resultBuilder(result.subarrayStart("results")); - // sort results by score (not always in correct order, especially w.r.t. multiterm) - sort( r.begin(), r.end() ); + // Quoth: "leave a mb for other things" + int resultSize = 1024 * 1024; - // build the results bson array shown to user - BSONArrayBuilder a( result.subarrayStart( "results" ) ); + int numReturned = 0; - int tempSize = 1024 * 1024; // leave a mb for other things - long long numReturned = 0; - - for ( unsigned n = 0; n < r.size(); n++ ) { - BSONObj obj = BSONObj::make(r[n].rec); - BSONObj toSendBack = obj; - - if ( pr ) { - toSendBack = pr->transform(obj); - } - - if ( ( tempSize + toSendBack.objsize() ) >= BSONObjMaxUserSize ) { + BSONObj obj; + while (Runner::RUNNER_ADVANCED == runner->getNext(&obj, NULL)) { + if ((resultSize + obj.objsize()) >= BSONObjMaxUserSize) { break; } - - BSONObjBuilder x( a.subobjStart() ); - x.append( "score" , r[n].score ); - x.append( "obj", toSendBack ); - - BSONObj xobj = x.done(); - tempSize += xobj.objsize(); - + // We return an array of results. Add another element. + BSONObjBuilder oneResultBuilder(resultBuilder.subobjStart()); + oneResultBuilder.append("score", obj["$s"].number()); + + // Strip out the score from the returned obj. + BSONObjIterator resIt(obj); + BSONObjBuilder resBob; + while (resIt.more()) { + BSONElement elt = resIt.next(); + if (!mongoutils::str::equals("$s", elt.fieldName())) { + resBob.append(elt); + } + } + oneResultBuilder.append("obj", resBob.obj()); + BSONObj addedArrayObj = oneResultBuilder.done(); + resultSize += addedArrayObj.objsize(); numReturned++; } - a.done(); + resultBuilder.done(); // returns some stats to the user - BSONObjBuilder bb( result.subobjStart( "stats" ) ); - bb.appendNumber( "nscanned" , search.getKeysLookedAt() ); - bb.appendNumber( "nscannedObjects" , search.getObjLookedAt() ); - bb.appendNumber( "n" , numReturned ); - bb.appendNumber( "nfound" , r.size() ); - bb.append( "timeMicros", (int)comm.micros() ); - bb.done(); + BSONObjBuilder stats(result.subobjStart("stats")); + + // Fill in nscanned from the explain. + TypeExplain* bareExplain; + Status res = runner->getExplainPlan(&bareExplain); + if (res.isOK()) { + auto_ptr<TypeExplain> explain(bareExplain); + stats.append("nscanned", explain->getNScanned()); + stats.append("nscannedObjects", explain->getNScannedObjects()); + } + + stats.appendNumber( "n" , numReturned ); + stats.append( "timeMicros", (int)comm.micros() ); + stats.done(); return true; } - } -} + } // namespace fts + +} // namespace mongo diff --git a/src/mongo/db/fts/fts_search.cpp b/src/mongo/db/fts/fts_search.cpp deleted file mode 100644 index 89c1bf2c4cc..00000000000 --- a/src/mongo/db/fts/fts_search.cpp +++ /dev/null @@ -1,193 +0,0 @@ -// fts_search.cpp - -/** -* Copyright (C) 2012 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ - -#include "mongo/pch.h" - -#include "mongo/db/btreecursor.h" -#include "mongo/db/fts/fts_index_format.h" -#include "mongo/db/fts/fts_search.h" -#include "mongo/db/kill_current_op.h" -#include "mongo/db/pdfile.h" - -namespace mongo { - - namespace fts { - - /* - * Constructor generates query and term dictionaries - * @param ns, namespace - * @param idxNum, index number - * @param search, query string - * @param language, language of the query - * @param filter, filter object - */ - FTSSearch::FTSSearch( IndexDescriptor* descriptor, - const FTSSpec& ftsSpec, - const BSONObj& indexPrefix, - const FTSQuery& query, - const BSONObj& filter ) - : _descriptor(descriptor), - _ftsSpec(ftsSpec), - _indexPrefix( indexPrefix ), - _query( query ), - _ftsMatcher(query, ftsSpec) { - - if ( !filter.isEmpty() ) - _matcher.reset( new CoveredIndexMatcher( filter, _descriptor->keyPattern() ) ); - - _keysLookedAt = 0; - _objectsLookedAt = 0; - } - - bool FTSSearch::_ok( Record* record ) const { - if ( !_query.hasNonTermPieces() ) - return true; - return _ftsMatcher.matchesNonTerm( BSONObj::make( record ) ); - } - - /* - * GO: sets the tree cursors on each term in terms, processes the terms by advancing - * the terms cursors and storing the partial - * results and lastly calculates the top results - * @param results, the priority queue containing the top results - * @param limit, number of results in the priority queue - */ - void FTSSearch::go(Results* results, unsigned limit ) { - vector< shared_ptr<BtreeCursor> > cursors; - - for ( unsigned i = 0; i < _query.getTerms().size(); i++ ) { - const string& term = _query.getTerms()[i]; - BSONObj min = FTSIndexFormat::getIndexKey( MAX_WEIGHT, term, _indexPrefix ); - BSONObj max = FTSIndexFormat::getIndexKey( 0, term, _indexPrefix ); - - shared_ptr<BtreeCursor> c( BtreeCursor::make( - nsdetails(_descriptor->parentNS().c_str()), - _descriptor->getOnDisk(), - min, max, true, -1 ) ); - - cursors.push_back( c ); - } - - while ( !inShutdown() ) { - bool gotAny = false; - for ( unsigned i = 0; i < cursors.size(); i++ ) { - if ( cursors[i]->eof() ) - continue; - gotAny = true; - _process( cursors[i].get() ); - cursors[i]->advance(); - } - - if ( !gotAny ) - break; - - RARELY killCurrentOp.checkForInterrupt(); - } - - - // priority queue using a compare that grabs the lowest of two ScoredLocations by score. - for ( Scores::iterator i = _scores.begin(); i != _scores.end(); ++i ) { - - if ( i->second < 0 ) - continue; - - // priority queue - if ( results->size() < limit ) { // case a: queue unfilled - - if ( !_ok( i->first ) ) - continue; - - results->push( ScoredLocation( i->first, i->second ) ); - - } - else if ( i->second > results->top().score ) { // case b: queue filled - - if ( !_ok( i->first ) ) - continue; - - results->pop(); - results->push( ScoredLocation( i->first, i->second ) ); - } - else { - // else do nothing (case c) - } - - } - - } - - /* - * Takes a cursor and updates the partial score for said cursor in _scores map - * @param cursor, btree cursor pointing to the current document to be scored - */ - void FTSSearch::_process( BtreeCursor* cursor ) { - _keysLookedAt++; - - BSONObj key = cursor->currKey(); - - BSONObjIterator i( key ); - for ( unsigned j = 0; j < _ftsSpec.numExtraBefore(); j++) - i.next(); - i.next(); // move past indexToken - BSONElement scoreElement = i.next(); - - double score = scoreElement.number(); - - double& cur = _scores[(cursor->currLoc()).rec()]; - - if ( cur < 0 ) { - // already been rejected - return; - } - - if ( cur == 0 && _matcher.get() ) { - // we haven't seen this before and we have a matcher - MatchDetails d; - if ( !_matcher->matchesCurrent( cursor, &d ) ) { - cur = -1; - } - - if ( d.hasLoadedRecord() ) - _objectsLookedAt++; - - if ( cur == -1 ) - return; - } - - if ( cur ) - cur += score * (1 + 1 / score); - else - cur += score; - - } - - } - -} diff --git a/src/mongo/db/fts/fts_search.h b/src/mongo/db/fts/fts_search.h deleted file mode 100644 index 17070cec903..00000000000 --- a/src/mongo/db/fts/fts_search.h +++ /dev/null @@ -1,114 +0,0 @@ -// fts_search.h - -/** -* Copyright (C) 2012 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ - -#pragma once - -#include <map> -#include <set> -#include <vector> -#include <queue> - -#include "mongo/base/disallow_copying.h" -#include "mongo/db/fts/fts_matcher.h" -#include "mongo/db/fts/fts_query.h" -#include "mongo/db/fts/fts_util.h" -#include "mongo/db/index/index_descriptor.h" -#include "mongo/db/matcher.h" - -// mongo::fts::FTSSearch is deprecated: the "text" command is deprecated in favor of the $text -// query operator. - -namespace mongo { - - class BtreeCursor; - - namespace fts { - - // priority queue template, for use when we're populating results - // vector returned to the user. extends the default priority_queue - // by providing direct access to the underlying vector, which should - // be used CAREFULLY because you can get into trouble.. - template <class T, class S, class C> - class a_priority_queue : public std::priority_queue<T, S, C> { - public: - // return the value of an element at position n when we call pq[n] - T operator[](const int &n) { return this->c[n]; } - // return underlying data structure. called dangerous because it is. - S dangerous() { return this->c; } - }; - - typedef a_priority_queue<ScoredLocation, vector<ScoredLocation>, ScoredLocationComp> Results; - - class FTSSearch { - MONGO_DISALLOW_COPYING(FTSSearch); - public: - - typedef std::map<Record*,double> Scores; - - FTSSearch( IndexDescriptor* descriptor, - const FTSSpec& ftsSpec, - const BSONObj& indexPrefix, - const FTSQuery& query, - const BSONObj& filter ); - - void go(Results* results, unsigned limit ); - - long long getKeysLookedAt() const { return _keysLookedAt; } - long long getObjLookedAt() const { return _objectsLookedAt; } - - private: - - void _process( BtreeCursor* cursor ); - - /** - * checks not index pieces - * i.e. prhases & negated terms - */ - bool _ok( Record* record ) const; - - IndexDescriptor* _descriptor; - const FTSSpec& _ftsSpec; - BSONObj _indexPrefix; - FTSQuery _query; - FTSMatcher _ftsMatcher; - - scoped_ptr<CoveredIndexMatcher> _matcher; - - long long _keysLookedAt; - long long _objectsLookedAt; - - Scores _scores; - - }; - - } // namespace fts - -} // namespace mongo - |