summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts
diff options
context:
space:
mode:
authorHari Khalsa <hkhalsa@10gen.com>2013-12-11 16:55:46 -0500
committerHari Khalsa <hkhalsa@10gen.com>2013-12-12 09:54:02 -0500
commit143e99cf0c36aaffbdbc74d4a8a2a7f50f119139 (patch)
treeb25924ac86ae6ad960942de1c797a18f4accd859 /src/mongo/db/fts
parent172bf6a4e24a65da9a6269507c2385c797d35cb7 (diff)
downloadmongo-143e99cf0c36aaffbdbc74d4a8a2a7f50f119139.tar.gz
SERVER-10026 migrate text cmd to use query exec
Diffstat (limited to 'src/mongo/db/fts')
-rw-r--r--src/mongo/db/fts/SConscript1
-rw-r--r--src/mongo/db/fts/fts_command.cpp1
-rw-r--r--src/mongo/db/fts/fts_command_mongod.cpp152
-rw-r--r--src/mongo/db/fts/fts_search.cpp193
-rw-r--r--src/mongo/db/fts/fts_search.h114
5 files changed, 69 insertions, 392 deletions
diff --git a/src/mongo/db/fts/SConscript b/src/mongo/db/fts/SConscript
index b0b13a443c1..7ec267cc6ad 100644
--- a/src/mongo/db/fts/SConscript
+++ b/src/mongo/db/fts/SConscript
@@ -48,7 +48,6 @@ env.Library( 'server_common', [
env.Library('ftsmongod', [
'fts_command_mongod.cpp',
- 'fts_search.cpp',
], LIBDEPS=["base","server_common"])
diff --git a/src/mongo/db/fts/fts_command.cpp b/src/mongo/db/fts/fts_command.cpp
index f7a105b772b..e861734a1da 100644
--- a/src/mongo/db/fts/fts_command.cpp
+++ b/src/mongo/db/fts/fts_command.cpp
@@ -33,7 +33,6 @@
#include "mongo/db/fts/fts_command.h"
#include "mongo/db/fts/fts_enabled.h"
-#include "mongo/db/fts/fts_search.h"
#include "mongo/db/fts/fts_util.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/timer.h"
diff --git a/src/mongo/db/fts/fts_command_mongod.cpp b/src/mongo/db/fts/fts_command_mongod.cpp
index 74022b7b9d4..e4ec4b56d95 100644
--- a/src/mongo/db/fts/fts_command_mongod.cpp
+++ b/src/mongo/db/fts/fts_command_mongod.cpp
@@ -28,17 +28,13 @@
* it in the license file.
*/
-#include <algorithm>
#include <string>
-#include <vector>
#include "mongo/db/fts/fts_command.h"
-#include "mongo/db/fts/fts_search.h"
#include "mongo/db/fts/fts_util.h"
-#include "mongo/db/index/catalog_hack.h"
-#include "mongo/db/index/fts_access_method.h"
#include "mongo/db/pdfile.h"
-#include "mongo/db/projection.h"
+#include "mongo/db/query/get_runner.h"
+#include "mongo/db/query/type_explain.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/timer.h"
@@ -74,106 +70,96 @@ namespace mongo {
Timer comm;
- scoped_ptr<Projection> pr;
- if ( !projection.isEmpty() ) {
- pr.reset( new Projection() );
- pr->init( projection );
- }
-
- // priority queue for results
- Results results;
-
- Database* db = cc().database();
- Collection* collection = db->getCollection( ns );
+ // Rewrite the cmd as a normal query.
+ BSONObjBuilder queryBob;
+ queryBob.appendElements(filter);
- if ( !collection ) {
- errmsg = "can't find ns";
- return false;
+ BSONObjBuilder textBob;
+ textBob.append("$search", searchString);
+ if (!language.empty()) {
+ textBob.append("$language", language);
}
+ queryBob.append("$text", textBob.obj());
- vector<int> idxMatches;
- collection->details()->findIndexByType( INDEX_NAME, idxMatches );
- if ( idxMatches.size() == 0 ) {
- errmsg = str::stream() << "no text index for: " << ns;
- return false;
- }
- if ( idxMatches.size() > 1 ) {
- errmsg = str::stream() << "too many text indexes for: " << ns;
- return false;
- }
+ // This is the query we exec.
+ BSONObj queryObj = queryBob.obj();
- BSONObj indexPrefix;
+ // We sort by the score.
+ BSONObj sortSpec = BSON("$s" << BSON("$meta" << "text"));
- IndexDescriptor* descriptor = collection->getIndexCatalog()->getDescriptor(idxMatches[0]);
- auto_ptr<FTSAccessMethod> fam(new FTSAccessMethod(descriptor));
- if ( language == "" ) {
- language = fam->getSpec().defaultLanguage().str();
- }
- Status s = fam->getSpec().getIndexPrefix( filter, &indexPrefix );
- if ( !s.isOK() ) {
- errmsg = s.toString();
+ // We also project the score into the document and strip it out later during the reformatting
+ // of the results.
+ BSONObjBuilder projBob;
+ projBob.appendElements(projection);
+ projBob.appendElements(sortSpec);
+ BSONObj projObj = projBob.obj();
+
+ CanonicalQuery* cq;
+ if (!CanonicalQuery::canonicalize(ns, queryObj, sortSpec, projObj, 0, limit, BSONObj(), &cq).isOK()) {
+ errmsg = "Can't parse filter / create query";
return false;
}
-
- FTSQuery query;
- if ( !query.parse( searchString, language ).isOK() ) {
- errmsg = "can't parse search";
+ Runner* rawRunner;
+ if (!getRunner(cq, &rawRunner, 0).isOK()) {
+ errmsg = "can't get query runner";
return false;
}
- result.append( "queryDebugString", query.debugString() );
- result.append( "language", language );
- FTSSearch search(descriptor, fam->getSpec(), indexPrefix, query, filter );
- search.go( &results, limit );
+ auto_ptr<Runner> runner(rawRunner);
- // grab underlying container inside priority queue
- vector<ScoredLocation> r( results.dangerous() );
+ BSONArrayBuilder resultBuilder(result.subarrayStart("results"));
- // sort results by score (not always in correct order, especially w.r.t. multiterm)
- sort( r.begin(), r.end() );
+ // Quoth: "leave a mb for other things"
+ int resultSize = 1024 * 1024;
- // build the results bson array shown to user
- BSONArrayBuilder a( result.subarrayStart( "results" ) );
+ int numReturned = 0;
- int tempSize = 1024 * 1024; // leave a mb for other things
- long long numReturned = 0;
-
- for ( unsigned n = 0; n < r.size(); n++ ) {
- BSONObj obj = BSONObj::make(r[n].rec);
- BSONObj toSendBack = obj;
-
- if ( pr ) {
- toSendBack = pr->transform(obj);
- }
-
- if ( ( tempSize + toSendBack.objsize() ) >= BSONObjMaxUserSize ) {
+ BSONObj obj;
+ while (Runner::RUNNER_ADVANCED == runner->getNext(&obj, NULL)) {
+ if ((resultSize + obj.objsize()) >= BSONObjMaxUserSize) {
break;
}
-
- BSONObjBuilder x( a.subobjStart() );
- x.append( "score" , r[n].score );
- x.append( "obj", toSendBack );
-
- BSONObj xobj = x.done();
- tempSize += xobj.objsize();
-
+ // We return an array of results. Add another element.
+ BSONObjBuilder oneResultBuilder(resultBuilder.subobjStart());
+ oneResultBuilder.append("score", obj["$s"].number());
+
+ // Strip out the score from the returned obj.
+ BSONObjIterator resIt(obj);
+ BSONObjBuilder resBob;
+ while (resIt.more()) {
+ BSONElement elt = resIt.next();
+ if (!mongoutils::str::equals("$s", elt.fieldName())) {
+ resBob.append(elt);
+ }
+ }
+ oneResultBuilder.append("obj", resBob.obj());
+ BSONObj addedArrayObj = oneResultBuilder.done();
+ resultSize += addedArrayObj.objsize();
numReturned++;
}
- a.done();
+ resultBuilder.done();
// returns some stats to the user
- BSONObjBuilder bb( result.subobjStart( "stats" ) );
- bb.appendNumber( "nscanned" , search.getKeysLookedAt() );
- bb.appendNumber( "nscannedObjects" , search.getObjLookedAt() );
- bb.appendNumber( "n" , numReturned );
- bb.appendNumber( "nfound" , r.size() );
- bb.append( "timeMicros", (int)comm.micros() );
- bb.done();
+ BSONObjBuilder stats(result.subobjStart("stats"));
+
+ // Fill in nscanned from the explain.
+ TypeExplain* bareExplain;
+ Status res = runner->getExplainPlan(&bareExplain);
+ if (res.isOK()) {
+ auto_ptr<TypeExplain> explain(bareExplain);
+ stats.append("nscanned", explain->getNScanned());
+ stats.append("nscannedObjects", explain->getNScannedObjects());
+ }
+
+ stats.appendNumber( "n" , numReturned );
+ stats.append( "timeMicros", (int)comm.micros() );
+ stats.done();
return true;
}
- }
-}
+ } // namespace fts
+
+} // namespace mongo
diff --git a/src/mongo/db/fts/fts_search.cpp b/src/mongo/db/fts/fts_search.cpp
deleted file mode 100644
index 89c1bf2c4cc..00000000000
--- a/src/mongo/db/fts/fts_search.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-// fts_search.cpp
-
-/**
-* Copyright (C) 2012 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#include "mongo/pch.h"
-
-#include "mongo/db/btreecursor.h"
-#include "mongo/db/fts/fts_index_format.h"
-#include "mongo/db/fts/fts_search.h"
-#include "mongo/db/kill_current_op.h"
-#include "mongo/db/pdfile.h"
-
-namespace mongo {
-
- namespace fts {
-
- /*
- * Constructor generates query and term dictionaries
- * @param ns, namespace
- * @param idxNum, index number
- * @param search, query string
- * @param language, language of the query
- * @param filter, filter object
- */
- FTSSearch::FTSSearch( IndexDescriptor* descriptor,
- const FTSSpec& ftsSpec,
- const BSONObj& indexPrefix,
- const FTSQuery& query,
- const BSONObj& filter )
- : _descriptor(descriptor),
- _ftsSpec(ftsSpec),
- _indexPrefix( indexPrefix ),
- _query( query ),
- _ftsMatcher(query, ftsSpec) {
-
- if ( !filter.isEmpty() )
- _matcher.reset( new CoveredIndexMatcher( filter, _descriptor->keyPattern() ) );
-
- _keysLookedAt = 0;
- _objectsLookedAt = 0;
- }
-
- bool FTSSearch::_ok( Record* record ) const {
- if ( !_query.hasNonTermPieces() )
- return true;
- return _ftsMatcher.matchesNonTerm( BSONObj::make( record ) );
- }
-
- /*
- * GO: sets the tree cursors on each term in terms, processes the terms by advancing
- * the terms cursors and storing the partial
- * results and lastly calculates the top results
- * @param results, the priority queue containing the top results
- * @param limit, number of results in the priority queue
- */
- void FTSSearch::go(Results* results, unsigned limit ) {
- vector< shared_ptr<BtreeCursor> > cursors;
-
- for ( unsigned i = 0; i < _query.getTerms().size(); i++ ) {
- const string& term = _query.getTerms()[i];
- BSONObj min = FTSIndexFormat::getIndexKey( MAX_WEIGHT, term, _indexPrefix );
- BSONObj max = FTSIndexFormat::getIndexKey( 0, term, _indexPrefix );
-
- shared_ptr<BtreeCursor> c( BtreeCursor::make(
- nsdetails(_descriptor->parentNS().c_str()),
- _descriptor->getOnDisk(),
- min, max, true, -1 ) );
-
- cursors.push_back( c );
- }
-
- while ( !inShutdown() ) {
- bool gotAny = false;
- for ( unsigned i = 0; i < cursors.size(); i++ ) {
- if ( cursors[i]->eof() )
- continue;
- gotAny = true;
- _process( cursors[i].get() );
- cursors[i]->advance();
- }
-
- if ( !gotAny )
- break;
-
- RARELY killCurrentOp.checkForInterrupt();
- }
-
-
- // priority queue using a compare that grabs the lowest of two ScoredLocations by score.
- for ( Scores::iterator i = _scores.begin(); i != _scores.end(); ++i ) {
-
- if ( i->second < 0 )
- continue;
-
- // priority queue
- if ( results->size() < limit ) { // case a: queue unfilled
-
- if ( !_ok( i->first ) )
- continue;
-
- results->push( ScoredLocation( i->first, i->second ) );
-
- }
- else if ( i->second > results->top().score ) { // case b: queue filled
-
- if ( !_ok( i->first ) )
- continue;
-
- results->pop();
- results->push( ScoredLocation( i->first, i->second ) );
- }
- else {
- // else do nothing (case c)
- }
-
- }
-
- }
-
- /*
- * Takes a cursor and updates the partial score for said cursor in _scores map
- * @param cursor, btree cursor pointing to the current document to be scored
- */
- void FTSSearch::_process( BtreeCursor* cursor ) {
- _keysLookedAt++;
-
- BSONObj key = cursor->currKey();
-
- BSONObjIterator i( key );
- for ( unsigned j = 0; j < _ftsSpec.numExtraBefore(); j++)
- i.next();
- i.next(); // move past indexToken
- BSONElement scoreElement = i.next();
-
- double score = scoreElement.number();
-
- double& cur = _scores[(cursor->currLoc()).rec()];
-
- if ( cur < 0 ) {
- // already been rejected
- return;
- }
-
- if ( cur == 0 && _matcher.get() ) {
- // we haven't seen this before and we have a matcher
- MatchDetails d;
- if ( !_matcher->matchesCurrent( cursor, &d ) ) {
- cur = -1;
- }
-
- if ( d.hasLoadedRecord() )
- _objectsLookedAt++;
-
- if ( cur == -1 )
- return;
- }
-
- if ( cur )
- cur += score * (1 + 1 / score);
- else
- cur += score;
-
- }
-
- }
-
-}
diff --git a/src/mongo/db/fts/fts_search.h b/src/mongo/db/fts/fts_search.h
deleted file mode 100644
index 17070cec903..00000000000
--- a/src/mongo/db/fts/fts_search.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// fts_search.h
-
-/**
-* Copyright (C) 2012 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*
-* As a special exception, the copyright holders give permission to link the
-* code of portions of this program with the OpenSSL library under certain
-* conditions as described in each individual source file and distribute
-* linked combinations including the program with the OpenSSL library. You
-* must comply with the GNU Affero General Public License in all respects for
-* all of the code used other than as permitted herein. If you modify file(s)
-* with this exception, you may extend this exception to your version of the
-* file(s), but you are not obligated to do so. If you do not wish to do so,
-* delete this exception statement from your version. If you delete this
-* exception statement from all source files in the program, then also delete
-* it in the license file.
-*/
-
-#pragma once
-
-#include <map>
-#include <set>
-#include <vector>
-#include <queue>
-
-#include "mongo/base/disallow_copying.h"
-#include "mongo/db/fts/fts_matcher.h"
-#include "mongo/db/fts/fts_query.h"
-#include "mongo/db/fts/fts_util.h"
-#include "mongo/db/index/index_descriptor.h"
-#include "mongo/db/matcher.h"
-
-// mongo::fts::FTSSearch is deprecated: the "text" command is deprecated in favor of the $text
-// query operator.
-
-namespace mongo {
-
- class BtreeCursor;
-
- namespace fts {
-
- // priority queue template, for use when we're populating results
- // vector returned to the user. extends the default priority_queue
- // by providing direct access to the underlying vector, which should
- // be used CAREFULLY because you can get into trouble..
- template <class T, class S, class C>
- class a_priority_queue : public std::priority_queue<T, S, C> {
- public:
- // return the value of an element at position n when we call pq[n]
- T operator[](const int &n) { return this->c[n]; }
- // return underlying data structure. called dangerous because it is.
- S dangerous() { return this->c; }
- };
-
- typedef a_priority_queue<ScoredLocation, vector<ScoredLocation>, ScoredLocationComp> Results;
-
- class FTSSearch {
- MONGO_DISALLOW_COPYING(FTSSearch);
- public:
-
- typedef std::map<Record*,double> Scores;
-
- FTSSearch( IndexDescriptor* descriptor,
- const FTSSpec& ftsSpec,
- const BSONObj& indexPrefix,
- const FTSQuery& query,
- const BSONObj& filter );
-
- void go(Results* results, unsigned limit );
-
- long long getKeysLookedAt() const { return _keysLookedAt; }
- long long getObjLookedAt() const { return _objectsLookedAt; }
-
- private:
-
- void _process( BtreeCursor* cursor );
-
- /**
- * checks not index pieces
- * i.e. prhases & negated terms
- */
- bool _ok( Record* record ) const;
-
- IndexDescriptor* _descriptor;
- const FTSSpec& _ftsSpec;
- BSONObj _indexPrefix;
- FTSQuery _query;
- FTSMatcher _ftsMatcher;
-
- scoped_ptr<CoveredIndexMatcher> _matcher;
-
- long long _keysLookedAt;
- long long _objectsLookedAt;
-
- Scores _scores;
-
- };
-
- } // namespace fts
-
-} // namespace mongo
-