diff options
Diffstat (limited to 'src/assistant/3rdparty/clucene/src/CLucene/index/IndexReader.h')
-rw-r--r-- | src/assistant/3rdparty/clucene/src/CLucene/index/IndexReader.h | 485 |
1 files changed, 485 insertions, 0 deletions
diff --git a/src/assistant/3rdparty/clucene/src/CLucene/index/IndexReader.h b/src/assistant/3rdparty/clucene/src/CLucene/index/IndexReader.h new file mode 100644 index 000000000..352d16e80 --- /dev/null +++ b/src/assistant/3rdparty/clucene/src/CLucene/index/IndexReader.h @@ -0,0 +1,485 @@ +/* + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. + * + * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved. +*/ +#ifndef _lucene_index_IndexReader_ +#define _lucene_index_IndexReader_ + +#if defined(_LUCENE_PRAGMA_ONCE) +# pragma once +#endif + +#include <QtCore/QString> + +#include "CLucene/store/Directory.h" +#include "CLucene/store/FSDirectory.h" +#include "CLucene/store/Lock.h" +#include "CLucene/document/Document.h" +#include "CLucene/index/TermVector.h" +#include "SegmentInfos.h" +#include "Terms.h" + + +CL_NS_DEF(index) + + +/** IndexReader is an abstract class, providing an interface for accessing an + index. Search of an index is done entirely through this abstract interface, + so that any subclass which implements it is searchable. + + <p> Concrete subclasses of IndexReader are usually constructed with a call to + one of the static <code>open()</code> methods, e.g. {@link #open(String)}. + + <p> For efficiency, in this API documents are often referred to via + <i>document numbers</i>, non-negative integers which each name a unique + document in the index. These document numbers are ephemeral--they may change + as documents are added to and deleted from an index. Clients should thus not + rely on a given document having the same number between sessions. + + <p> An IndexReader can be opened on a directory for which an IndexWriter is + opened already, but it cannot be used to delete documents from the index then. +*/ +class IndexReader : LUCENE_BASE +{ +public: + //Callback for classes that need to know if IndexReader is closing. + typedef void (*CloseCallback)(IndexReader*, void*); + + class CloseCallbackCompare:public CL_NS(util)::Compare::_base{ + public: + bool operator()( CloseCallback t1, CloseCallback t2 ) const{ + return t1 > t2; + } + static void doDelete(CloseCallback dummy){ + } + }; + + + enum FieldOption { + // all fields + ALL = 1, + // all indexed fields + INDEXED = 2, + // all fields which are not indexed + UNINDEXED = 4, + // all fields which are indexed with termvectors enables + INDEXED_WITH_TERMVECTOR = 8, + // all fields which are indexed but don't have termvectors enabled + INDEXED_NO_TERMVECTOR = 16, + // all fields where termvectors are enabled. Please note that only standard termvector fields are returned + TERMVECTOR = 32, + // all field with termvectors wiht positions enabled + TERMVECTOR_WITH_POSITION = 64, + // all fields where termvectors with offset position are set + TERMVECTOR_WITH_OFFSET = 128, + // all fields where termvectors with offset and position values set + TERMVECTOR_WITH_POSITION_OFFSET = 256 + }; + + +private: + bool stale; + bool hasChanges; + bool closeDirectory; + bool directoryOwner; + + SegmentInfos* segmentInfos; + CL_NS(store)::Directory* directory; + CL_NS(store)::LuceneLock* writeLock; + + typedef CL_NS(util)::CLSet<CloseCallback, void*, CloseCallbackCompare, + CloseCallbackCompare> CloseCallbackMap; + CloseCallbackMap closeCallbacks; + + /** Internal use. Implements commit */ + virtual void doCommit() = 0; + + /** + * Tries to acquire the WriteLock on this directory. + * this method is only valid if this IndexReader is directory owner. + * + * @throws IOException If WriteLock cannot be acquired. + */ + void aquireWriteLock(); +protected: + /** + * Constructor used if IndexReader is not owner of its directory. + * This is used for IndexReaders that are used within other IndexReaders that take care or locking directories. + * + * @param directory Directory where IndexReader files reside. + */ + IndexReader(CL_NS(store)::Directory* dir); + + /** + * Constructor used if IndexReader is owner of its directory. + * If IndexReader is owner of its directory, it locks its directory in case of write operations. + * + * @param directory Directory where IndexReader files reside. + * @param segmentInfos Used for write-l + * @param closeDirectory + */ + IndexReader(CL_NS(store)::Directory* directory, SegmentInfos* segmentInfos, bool closeDirectory); + + + /// Implements close. + virtual void doClose() = 0; + + /** Implements setNorm in subclass.*/ + virtual void doSetNorm(int32_t doc, const TCHAR* field, uint8_t value) = 0; + + /** Implements actual undeleteAll() in subclass. */ + virtual void doUndeleteAll() = 0; + + + /** Implements deletion of the document numbered <code>docNum</code>. + * Applications should call {@link #deleteDocument(int32_t)} or {@link #deleteDocuments(Term*)}. + */ + virtual void doDelete(const int32_t docNum) = 0; + +public: + + DEFINE_MUTEX(THIS_LOCK) + + ///Do not access this directly, only public so that MultiReader can access it + virtual void commit(); + + + /** Undeletes all documents currently marked as deleted in this index.*/ + void undeleteAll(); + + /** + * Get a list of unique field names that exist in this index and have the specified + * field option information. + * @param fldOption specifies which field option should be available for the returned fields + * @return Collection of Strings indicating the names of the fields. + * @see IndexReader.FieldOption + */ + virtual void getFieldNames(FieldOption fldOption, CL_NS(util)::StringArrayWithDeletor& retarray) = 0; + + _CL_DEPRECATED( getFieldNames(FieldOption, StringArrayWithDeletor&) ) virtual TCHAR** getFieldNames(); + _CL_DEPRECATED( getFieldNames(FieldOption, StringArrayWithDeletor&) ) virtual TCHAR** getFieldNames(bool indexed); + + /** Returns the byte-encoded normalization factor for the named field of + * every document. This is used by the search code to score documents. + * + * The number of bytes returned is the size of the IndexReader->maxDoc() + * MEMORY: The values are cached, so don't delete the returned byte array. + * @see Field#setBoost(qreal) + */ + virtual uint8_t* norms(const TCHAR* field) = 0; + + + /** Reads the byte-encoded normalization factor for the named field of every + * document. This is used by the search code to score documents. + * + * @see Field#setBoost(qreal) + */ + virtual void norms(const TCHAR* field, uint8_t* bytes) = 0; + + /** Expert: Resets the normalization factor for the named field of the named + * document. + * + * @see #norms(TCHAR*) + * @see Similarity#decodeNorm(uint8_t) + */ + void setNorm(int32_t doc, const TCHAR* field, qreal value); + + /** Expert: Resets the normalization factor for the named field of the named + * document. The norm represents the product of the field's {@link + * Field#setBoost(qreal) boost} and its {@link Similarity#lengthNorm(TCHAR*, + * int32_t) length normalization}. Thus, to preserve the length normalization + * values when resetting this, one should base the new value upon the old. + * + * @see #norms(TCHAR*) + * @see Similarity#decodeNorm(uint8_t) + */ + void setNorm(int32_t doc, const TCHAR* field, uint8_t value); + + /// Release the write lock, if needed. + virtual ~IndexReader(); + + /// Returns an IndexReader reading the index in an FSDirectory in the named path. + static IndexReader* open(const QString& path); + + /// Returns an IndexReader reading the index in the given Directory. + static IndexReader* open( CL_NS(store)::Directory* directory, bool closeDirectory=false); + + /** + * Returns the time the index in the named directory was last modified. + * Do not use this to check whether the reader is still up-to-date, use + * {@link #isCurrent()} instead. + */ + static uint64_t lastModified(const QString& directory); + + /** + * Returns the time the index in the named directory was last modified. + * Do not use this to check whether the reader is still up-to-date, use + * {@link #isCurrent()} instead. + */ + static uint64_t lastModified(const CL_NS(store)::Directory* directory); + + + /** + * Reads version number from segments files. The version number is + * initialized with a timestamp and then increased by one for each change of + * the index. + * + * @param directory where the index resides. + * @return version number. + * @throws IOException if segments file cannot be read + */ + static int64_t getCurrentVersion(CL_NS(store)::Directory* directory); + + /** + * Reads version number from segments files. The version number is + * initialized with a timestamp and then increased by one for each change of + * the index. + * + * @param directory where the index resides. + * @return version number. + * @throws IOException if segments file cannot be read + */ + static int64_t getCurrentVersion(const QString& directory); + + /** + * Version number when this IndexReader was opened. + */ + int64_t getVersion(); + + /** + * Check whether this IndexReader still works on a current version of the index. + * If this is not the case you will need to re-open the IndexReader to + * make sure you see the latest changes made to the index. + * + * @throws IOException + */ + bool isCurrent(); + + + /** + * Return an array of term frequency vectors for the specified document. + * The array contains a vector for each vectorized field in the document. + * Each vector contains terms and frequencies for all terms in a given vectorized field. + * If no such fields existed, the method returns null. The term vectors that are + * returned my either be of type TermFreqVector or of type TermPositionsVector if + * positions or offsets have been stored. + * + * @param docNumber document for which term frequency vectors are returned + * @return array of term frequency vectors. May be null if no term vectors have been + * stored for the specified document. + * @throws IOException if index cannot be accessed + * @see org.apache.lucene.document.Field.TermVector + */ + virtual bool getTermFreqVectors(int32_t docNumber, Array<TermFreqVector*>& result) =0; + + /** + * Return a term frequency vector for the specified document and field. The + * returned vector contains terms and frequencies for the terms in + * the specified field of this document, if the field had the storeTermVector + * flag set. If termvectors had been stored with positions or offsets, a + * TermPositionsVector is returned. + * + * @param docNumber document for which the term frequency vector is returned + * @param field field for which the term frequency vector is returned. + * @return term frequency vector May be null if field does not exist in the specified + * document or term vector was not stored. + * @throws IOException if index cannot be accessed + * @see org.apache.lucene.document.Field.TermVector + */ + virtual TermFreqVector* getTermFreqVector(int32_t docNumber, const TCHAR* field) = 0; + + /** + * Returns <code>true</code> if an index exists at the specified directory. + * If the directory does not exist or if there is no index in it. + * @param directory the directory to check for an index + * @return <code>true</code> if an index exists; <code>false</code> otherwise + */ + static bool indexExists(const QString& directory); + + /** + * Returns <code>true</code> if an index exists at the specified directory. + * If the directory does not exist or if there is no index in it. + * @param directory the directory to check for an index + * @return <code>true</code> if an index exists; <code>false</code> otherwise + * @throws IOException if there is a problem with accessing the index + */ + static bool indexExists(const CL_NS(store)::Directory* directory); + + /** Returns the number of documents in this index. */ + virtual int32_t numDocs() = 0; + + /** Returns one greater than the largest possible document number. + * This may be used to, e.g., determine how big to allocate an array which + * will have an element for every document number in an index. + */ + virtual int32_t maxDoc() const = 0; + + /** Gets the stored fields of the <code>n</code><sup>th</sup> + * <code>Document</code> in this index. + * The fields are not cleared before retrieving the document, so the + * object should be new or just cleared. + */ + virtual bool document(int32_t n, CL_NS(document)::Document*) =0; + + _CL_DEPRECATED( document(i, document) ) CL_NS(document)::Document* document(const int32_t n); + + /** Returns true if document <i>n</i> has been deleted */ + virtual bool isDeleted(const int32_t n) = 0; + + /** Returns true if any documents have been deleted */ + virtual bool hasDeletions() const = 0; + + /** Returns true if there are norms stored for this field. */ + virtual bool hasNorms(const TCHAR* field); + + /** Returns an enumeration of all the terms in the index. + * The enumeration is ordered by Term.compareTo(). Each term + * is greater than all that precede it in the enumeration. + * @memory Caller must clean up + */ + virtual TermEnum* terms() const =0; + + /** Returns an enumeration of all terms after a given term. + * The enumeration is ordered by Term.compareTo(). Each term + * is greater than all that precede it in the enumeration. + * @memory Caller must clean up + */ + virtual TermEnum* terms(const Term* t) const = 0; + + /** Returns the number of documents containing the term <code>t</code>. */ + virtual int32_t docFreq(const Term* t) const = 0; + + /* Returns an unpositioned TermPositions enumerator. + * @memory Caller must clean up + */ + virtual TermPositions* termPositions() const = 0; + + /** Returns an enumeration of all the documents which contain + * <code>term</code>. For each document, in addition to the document number + * and frequency of the term in that document, a list of all of the ordinal + * positions of the term in the document is available. Thus, this method + * implements the mapping: + * + * <p><ul> + * Term => <docNum, freq, + * <pos<sub>1</sub>, pos<sub>2</sub>, ... + * pos<sub>freq-1</sub>> + * ><sup>*</sup> + * </ul> + * <p> This positional information faciliates phrase and proximity searching. + * <p>The enumeration is ordered by document number. Each document number is + * greater than all that precede it in the enumeration. + * @memory Caller must clean up + */ + TermPositions* termPositions(Term* term) const; + + /** Returns an unpositioned {@link TermDocs} enumerator. + * @memory Caller must clean up + */ + virtual TermDocs* termDocs() const = 0; + + /** Returns an enumeration of all the documents which contain + * <code>term</code>. For each document, the document number, the frequency of + * the term in that document is also provided, for use in search scoring. + * Thus, this method implements the mapping: + * <p><ul>Term => <docNum, freq><sup>*</sup></ul> + * <p>The enumeration is ordered by document number. Each document number + * is greater than all that precede it in the enumeration. + * @memory Caller must clean up + */ + TermDocs* termDocs(Term* term) const; + + /** Deletes the document numbered <code>docNum</code>. Once a document is + * deleted it will not appear in TermDocs or TermPostitions enumerations. + * Attempts to read its field with the {@link #document} + * method will result in an error. The presence of this document may still be + * reflected in the {@link #docFreq} statistic, though + * this will be corrected eventually as the index is further modified. + */ + void deleteDocument(const int32_t docNum); + + ///@deprecated. Use deleteDocument instead. + _CL_DEPRECATED( deleteDocument ) void deleteDoc(const int32_t docNum) + { deleteDocument(docNum); } + + /** Deletes all documents containing <code>term</code>. + * This is useful if one uses a document field to hold a unique ID string for + * the document. Then to delete such a document, one merely constructs a + * term with the appropriate field and the unique ID string as its text and + * passes it to this method. + * See {@link #deleteDocument(int)} for information about when this deletion will + * become effective. + * @return the number of documents deleted + */ + int32_t deleteDocuments(Term* term); + + ///@deprecated. Use deleteDocuments instead. + _CL_DEPRECATED( deleteDocuments ) int32_t deleteTerm(Term* term){ return deleteDocuments(term); } + + /** + * Closes files associated with this index and also saves any new deletions to disk. + * No other methods should be called after this has been called. + */ + void close(); + + ///Checks if the index in the named directory is currently locked. + static bool isLocked(CL_NS(store)::Directory* directory); + + ///Checks if the index in the named directory is currently locked. + static bool isLocked(const QString& directory); + + + ///Forcibly unlocks the index in the named directory. + ///Caution: this should only be used by failure recovery code, + ///when it is known that no other process nor thread is in fact + ///currently accessing this index. + static void unlock(CL_NS(store)::Directory* directory); + static void unlock(const QString& path); + + /** Returns the directory this index resides in. */ + CL_NS(store)::Directory* getDirectory() { return directory; } + + /** Returns true if the file is a lucene filename (based on extension or filename) */ + static bool isLuceneFile(const QString& filename); + + /** + * For classes that need to know when the IndexReader closes (such as caches, etc), + * should pass their callback function to this. + */ + void addCloseCallback(CloseCallback callback, void* parameter); + +protected: + class LockWith : public CL_NS(store)::LuceneLockWith<IndexReader*> + { + public: + LockWith(CL_NS(store)::LuceneLock* lock, CL_NS(store)::Directory* dir); + + //Reads the segmentinfo file and depending on the number of segments found + //it returns a MultiReader or a SegmentReader + IndexReader* doBody(); + + private: + CL_NS(store)::Directory* directory; + }; + friend class IndexReader::LockWith; + + class CommitLockWith : public CL_NS(store)::LuceneLockWith<void> + { + public: + CommitLockWith(CL_NS(store)::LuceneLock* lock, IndexReader* r); + void doBody(); + + private: + IndexReader* reader; + }; + friend class IndexReader::CommitLockWith; +}; + +CL_NS_END +#endif + + |