diff options
Diffstat (limited to 'src/assistant/3rdparty/clucene/src/CLucene/search/BooleanQuery.cpp')
-rw-r--r-- | src/assistant/3rdparty/clucene/src/CLucene/search/BooleanQuery.cpp | 363 |
1 files changed, 363 insertions, 0 deletions
diff --git a/src/assistant/3rdparty/clucene/src/CLucene/search/BooleanQuery.cpp b/src/assistant/3rdparty/clucene/src/CLucene/search/BooleanQuery.cpp new file mode 100644 index 000000000..3fd36d847 --- /dev/null +++ b/src/assistant/3rdparty/clucene/src/CLucene/search/BooleanQuery.cpp @@ -0,0 +1,363 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "BooleanQuery.h" + +#include "BooleanClause.h" +#include "CLucene/index/IndexReader.h" +#include "CLucene/util/StringBuffer.h" +#include "CLucene/util/Arrays.h" +#include "SearchHeader.h" +#include "BooleanScorer.h" +#include "Scorer.h" + +CL_NS_USE(index) +CL_NS_USE(util) +CL_NS_DEF(search) + + BooleanQuery::BooleanQuery(): + clauses(true) + { + } + + BooleanQuery::BooleanQuery(const BooleanQuery& clone): + Query(clone) + { + for ( uint32_t i=0;i<clone.clauses.size();i++ ){ + BooleanClause* clause = clone.clauses[i]->clone(); + clause->deleteQuery=true; + add(clause); + } + } + + BooleanQuery::~BooleanQuery(){ + clauses.clear(); + } + + size_t BooleanQuery::hashCode() const { + //todo: do cachedHashCode, and invalidate on add/remove clause + size_t ret = 0; + for (uint32_t i = 0 ; i < clauses.size(); i++) { + BooleanClause* c = clauses[i]; + ret = 31 * ret + c->hashCode(); + } + ret = ret ^ Similarity::floatToByte(getBoost()); + return ret; + } + + const TCHAR* BooleanQuery::getQueryName() const{ + return getClassName(); + } + const TCHAR* BooleanQuery::getClassName(){ + return _T("BooleanQuery"); + } + + /** + * Default value is 1024. Use <code>org.apache.lucene.maxClauseCount</code> + * system property to override. + */ + size_t BooleanQuery::maxClauseCount = LUCENE_BOOLEANQUERY_MAXCLAUSECOUNT; + size_t BooleanQuery::getMaxClauseCount(){ + return maxClauseCount; + } + + void BooleanQuery::setMaxClauseCount(size_t maxClauseCount){ + BooleanQuery::maxClauseCount = maxClauseCount; + } + + void BooleanQuery::add(Query* query, const bool deleteQuery, const bool required, const bool prohibited) { + BooleanClause* bc = _CLNEW BooleanClause(query,deleteQuery,required, prohibited); + try{ + add(bc); + }catch(...){ + _CLDELETE(bc); + throw; + } + } + + void BooleanQuery::add(BooleanClause* clause) { + if (clauses.size() >= getMaxClauseCount()) + _CLTHROWA(CL_ERR_TooManyClauses,"Too Many Clauses"); + + clauses.push_back(clause); + } + + + size_t BooleanQuery::getClauseCount() const { + return (int32_t) clauses.size(); + } + + TCHAR* BooleanQuery::toString(const TCHAR* field) const{ + StringBuffer buffer; + if (getBoost() != 1.0) { + buffer.append(_T("(")); + } + + for (uint32_t i = 0 ; i < clauses.size(); i++) { + BooleanClause* c = clauses[i]; + if (c->prohibited) + buffer.append(_T("-")); + else if (c->required) + buffer.append(_T("+")); + + if ( c->query->instanceOf(BooleanQuery::getClassName()) ) { // wrap sub-bools in parens + buffer.append(_T("(")); + + TCHAR* buf = c->query->toString(field); + buffer.append(buf); + _CLDELETE_CARRAY( buf ); + + buffer.append(_T(")")); + } else { + TCHAR* buf = c->query->toString(field); + buffer.append(buf); + _CLDELETE_CARRAY( buf ); + } + if (i != clauses.size()-1) + buffer.append(_T(" ")); + + if (getBoost() != 1.0) { + buffer.append(_T(")^")); + buffer.appendFloat(getBoost(),1); + } + } + return buffer.toString(); + } + + + + + BooleanClause** BooleanQuery::getClauses() const + { + CND_MESSAGE(false, "Warning: BooleanQuery::getClauses() is deprecated") + BooleanClause** ret = _CL_NEWARRAY(BooleanClause*, clauses.size()+1); + getClauses(ret); + return ret; + } + + void BooleanQuery::getClauses(BooleanClause** ret) const + { + size_t size=clauses.size(); + for ( uint32_t i=0;i<size;i++ ) + ret[i] = clauses[i]; + } + Query* BooleanQuery::rewrite(IndexReader* reader) { + if (clauses.size() == 1) { // optimize 1-clause queries + BooleanClause* c = clauses[0]; + if (!c->prohibited) { // just return clause + Query* query = c->query->rewrite(reader); // rewrite first + + //if the query doesn't actually get re-written, + //then return a clone (because the BooleanQuery + //will register different to the returned query. + if ( query == c->query ) + query = query->clone(); + + if (getBoost() != 1.0f) { // incorporate boost + query->setBoost(getBoost() * query->getBoost()); + } + + return query; + } + } + + BooleanQuery* clone = NULL; // recursively rewrite + for (uint32_t i = 0 ; i < clauses.size(); i++) { + BooleanClause* c = clauses[i]; + Query* query = c->query->rewrite(reader); + if (query != c->query) { // clause rewrote: must clone + if (clone == NULL) + clone = (BooleanQuery*)this->clone(); + //todo: check if delete query should be on... + //in fact we should try and get rid of these + //for compatibility sake + clone->clauses.set (i, _CLNEW BooleanClause(query, true, c->required, c->prohibited)); + } + } + if (clone != NULL) { + return clone; // some clauses rewrote + } else + return this; // no clauses rewrote + } + + + Query* BooleanQuery::clone() const{ + BooleanQuery* clone = _CLNEW BooleanQuery(*this); + return clone; + } + + /** Returns true iff <code>o</code> is equal to this. */ + bool BooleanQuery::equals(Query* o)const { + if (!(o->instanceOf(BooleanQuery::getClassName()))) + return false; + const BooleanQuery* other = (BooleanQuery*)o; + + bool ret = (this->getBoost() == other->getBoost()); + if ( ret ){ + CLListEquals<BooleanClause,BooleanClause::Compare, const ClausesType, const ClausesType> comp; + ret = comp.equals(&this->clauses,&other->clauses); + } + return ret; + } + + qreal BooleanQuery::BooleanWeight::getValue() { return parentQuery->getBoost(); } + Query* BooleanQuery::BooleanWeight::getQuery() { return (Query*)parentQuery; } + + + + + + BooleanQuery::BooleanWeight::BooleanWeight(Searcher* searcher, + CLVector<BooleanClause*,Deletor::Object<BooleanClause> >* clauses, BooleanQuery* parentQuery) + { + this->searcher = searcher; + this->parentQuery = parentQuery; + this->clauses = clauses; + for (uint32_t i = 0 ; i < clauses->size(); i++) { + weights.push_back((*clauses)[i]->query->_createWeight(searcher)); + } + } + BooleanQuery::BooleanWeight::~BooleanWeight(){ + this->weights.clear(); + } + + qreal BooleanQuery::BooleanWeight::sumOfSquaredWeights() { + qreal sum = 0.0f; + for (uint32_t i = 0 ; i < weights.size(); i++) { + BooleanClause* c = (*clauses)[i]; + Weight* w = weights[i]; + if (!c->prohibited) + sum += w->sumOfSquaredWeights(); // sum sub weights + } + sum *= parentQuery->getBoost() * parentQuery->getBoost(); // boost each sub-weight + return sum ; + } + + void BooleanQuery::BooleanWeight::normalize(qreal norm) { + norm *= parentQuery->getBoost(); // incorporate boost + for (uint32_t i = 0 ; i < weights.size(); i++) { + BooleanClause* c = (*clauses)[i]; + Weight* w = weights[i]; + if (!c->prohibited) + w->normalize(norm); + } + } + + Scorer* BooleanQuery::BooleanWeight::scorer(IndexReader* reader){ + // First see if the (faster) ConjunctionScorer will work. This can be + // used when all clauses are required. Also, at this point a + // BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits + // from a BooleanScorer are not always sorted by document number (sigh) + // and hence BooleanScorer cannot implement skipTo() correctly, which is + // required by ConjunctionScorer. + bool allRequired = true; + bool noneBoolean = true; + { //msvc6 scope fix + for (uint32_t i = 0 ; i < weights.size(); i++) { + BooleanClause* c = (*clauses)[i]; + if (!c->required) + allRequired = false; + if (c->query->instanceOf(BooleanQuery::getClassName())) + noneBoolean = false; + } + } + + if (allRequired && noneBoolean) { // ConjunctionScorer is okay + ConjunctionScorer* result = + _CLNEW ConjunctionScorer(parentQuery->getSimilarity(searcher)); + for (uint32_t i = 0 ; i < weights.size(); i++) { + Weight* w = weights[i]; + Scorer* subScorer = w->scorer(reader); + if (subScorer == NULL) + return NULL; + result->add(subScorer); + } + return result; + } + + // Use good-old BooleanScorer instead. + BooleanScorer* result = _CLNEW BooleanScorer(parentQuery->getSimilarity(searcher)); + + { //msvc6 scope fix + for (uint32_t i = 0 ; i < weights.size(); i++) { + BooleanClause* c = (*clauses)[i]; + Weight* w = weights[i]; + Scorer* subScorer = w->scorer(reader); + if (subScorer != NULL) + result->add(subScorer, c->required, c->prohibited); + else if (c->required) + return NULL; + } + } + + return result; + } + + void BooleanQuery::BooleanWeight::explain(IndexReader* reader, int32_t doc, Explanation* result){ + int32_t coord = 0; + int32_t maxCoord = 0; + qreal sum = 0.0f; + Explanation* sumExpl = _CLNEW Explanation; + for (uint32_t i = 0 ; i < weights.size(); i++) { + BooleanClause* c = (*clauses)[i]; + Weight* w = weights[i]; + Explanation* e = _CLNEW Explanation; + w->explain(reader, doc, e); + if (!c->prohibited) + maxCoord++; + if (e->getValue() > 0) { + if (!c->prohibited) { + sumExpl->addDetail(e); + sum += e->getValue(); + coord++; + e = NULL; //prevent e from being deleted + } else { + //we want to return something else... + _CLDELETE(sumExpl); + result->setValue(0.0f); + result->setDescription(_T("match prohibited")); + return; + } + } else if (c->required) { + _CLDELETE(sumExpl); + result->setValue(0.0f); + result->setDescription(_T("match prohibited")); + return; + } + + _CLDELETE(e); + } + sumExpl->setValue(sum); + + if (coord == 1){ // only one clause matched + Explanation* tmp = sumExpl; + sumExpl = sumExpl->getDetail(0)->clone(); // eliminate wrapper + _CLDELETE(tmp); + } + + sumExpl->setDescription(_T("sum of:")); + qreal coordFactor = parentQuery->getSimilarity(searcher)->coord(coord, maxCoord); + if (coordFactor == 1.0f){ // coord is no-op + result->set(*sumExpl); // eliminate wrapper + _CLDELETE(sumExpl); + } else { + result->setDescription( _T("product of:")); + result->addDetail(sumExpl); + + StringBuffer explbuf; + explbuf.append(_T("coord(")); + explbuf.appendInt(coord); + explbuf.append(_T("/")); + explbuf.appendInt(maxCoord); + explbuf.append(_T(")")); + result->addDetail(_CLNEW Explanation(coordFactor, explbuf.getBuffer())); + result->setValue(sum*coordFactor); + } + } + + +CL_NS_END |