// fts_element_iterator.cpp /** * Copyright (C) 2014 MongoDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #include "mongo/db/fts/fts_element_iterator.h" #include "mongo/db/fts/fts_spec.h" #include "mongo/db/fts/fts_util.h" #include "mongo/util/mongoutils/str.h" #include "mongo/util/stringutils.h" #include namespace mongo { namespace fts { using std::string; extern const double DEFAULT_WEIGHT; extern const double MAX_WEIGHT; std::ostream& operator<<( std::ostream& os, FTSElementIterator::FTSIteratorFrame& frame ) { BSONObjIterator it = frame._it; return os << "FTSIteratorFrame[" " element=" << (*it).toString() << ", _language=" << frame._language->str() << ", _parentPath=" << frame._parentPath << ", _isArray=" << frame._isArray << "]"; } FTSElementIterator::FTSElementIterator( const FTSSpec& spec, const BSONObj& obj ) : _frame( obj, spec, &spec.defaultLanguage(), "", false ), _spec( spec ), _currentValue( advance() ) { } namespace { /** Check for exact match or path prefix match. */ inline bool _matchPrefix( const string& dottedName, const string& weight ) { if ( weight == dottedName ) { return true; } return mongoutils::str::startsWith( weight, dottedName + '.' ); } } bool FTSElementIterator::more() { //_currentValue = advance(); return _currentValue.valid(); } FTSIteratorValue FTSElementIterator::next() { FTSIteratorValue result = _currentValue; _currentValue = advance(); return result; } /** * Helper method: * if (current object iterator not exhausted) return true; * while (frame stack not empty) { * resume object iterator popped from stack; * if (resumed iterator not exhausted) return true; * } * return false; */ bool FTSElementIterator::moreFrames() { if (_frame._it.more()) return true; while (!_frameStack.empty()) { _frame = _frameStack.top(); _frameStack.pop(); if (_frame._it.more()) { return true; } } return false; } FTSIteratorValue FTSElementIterator::advance() { while ( moreFrames() ) { BSONElement elem = _frame._it.next(); string fieldName = elem.fieldName(); // Skip "language" specifier fields if wildcard. if ( _spec.wildcard() && _spec.languageOverrideField() == fieldName ) { continue; } // Compose the dotted name of the current field: // 1. parent path empty (top level): use the current field name // 2. parent path non-empty and obj is an array: use the parent path // 3. parent path non-empty and obj is a sub-doc: append field name to parent path string dottedName = ( _frame._parentPath.empty() ? fieldName : _frame._isArray ? _frame._parentPath : _frame._parentPath + '.' + fieldName ); // Find lower bound of dottedName in _weights. lower_bound leaves us at the first // weight that could possibly match or be a prefix of dottedName. And if this // element fails to match, then no subsequent weight can match, since the weights // are lexicographically ordered. Weights::const_iterator i = _spec.weights().lower_bound( elem.type() == Object ? dottedName + '.' : dottedName ); // possibleWeightMatch is set if the weight map contains either a match or some item // lexicographically larger than fieldName. This boolean acts as a guard on // dereferences of iterator 'i'. bool possibleWeightMatch = ( i != _spec.weights().end() ); // Optimize away two cases, when not wildcard: // 1. lower_bound seeks to end(): no prefix match possible // 2. lower_bound seeks to a name which is not a prefix if ( !_spec.wildcard() ) { if ( !possibleWeightMatch ) { continue; } else if ( !_matchPrefix( dottedName, i->first ) ) { continue; } } // Is the current field an exact match on a weight? bool exactMatch = ( possibleWeightMatch && i->first == dottedName ); double weight = ( possibleWeightMatch ? i->second : DEFAULT_WEIGHT ); switch ( elem.type() ) { case String: // Only index strings on exact match or wildcard. if ( exactMatch || _spec.wildcard() ) { return FTSIteratorValue( elem.valuestr(), _frame._language, weight ); } break; case Object: // Only descend into a sub-document on proper prefix or wildcard. Note that // !exactMatch is a sufficient test for proper prefix match, because of // if ( !matchPrefix( dottedName, i->first ) ) continue; // block above. if ( !exactMatch || _spec.wildcard() ) { _frameStack.push( _frame ); _frame = FTSIteratorFrame( elem.Obj(), _spec, _frame._language, dottedName, false ); } break; case Array: // Only descend into arrays from non-array parents or on wildcard. if ( !_frame._isArray || _spec.wildcard() ) { _frameStack.push( _frame ); _frame = FTSIteratorFrame( elem.Obj(), _spec, _frame._language, dottedName, true ); } break; default: // Skip over all other BSON types. break; } } return FTSIteratorValue(); // valid()==false } } // namespace fts } // namespace mongo