diff options
Diffstat (limited to 'src/assistant/3rdparty/clucene/src/CLucene/analysis/AnalysisHeader.cpp')
-rw-r--r-- | src/assistant/3rdparty/clucene/src/CLucene/analysis/AnalysisHeader.cpp | 200 |
1 files changed, 200 insertions, 0 deletions
diff --git a/src/assistant/3rdparty/clucene/src/CLucene/analysis/AnalysisHeader.cpp b/src/assistant/3rdparty/clucene/src/CLucene/analysis/AnalysisHeader.cpp new file mode 100644 index 000000000..03f61a038 --- /dev/null +++ b/src/assistant/3rdparty/clucene/src/CLucene/analysis/AnalysisHeader.cpp @@ -0,0 +1,200 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "AnalysisHeader.h" +#include "CLucene/util/StringBuffer.h" + +CL_NS_USE(util) +CL_NS_DEF(analysis) + +const TCHAR* Token::defaultType=_T("word"); + +Token::Token(): + _startOffset (0), + _endOffset (0), + _type ( defaultType ), + positionIncrement (1) +{ + _termTextLen = 0; +#ifndef LUCENE_TOKEN_WORD_LENGTH + _termText = NULL; + bufferTextLen = 0; +#else + _termText[0] = 0; //make sure null terminated + bufferTextLen = LUCENE_TOKEN_WORD_LENGTH+1; +#endif +} + +Token::~Token(){ +#ifndef LUCENE_TOKEN_WORD_LENGTH + free(_termText); +#endif +} + +Token::Token(const TCHAR* text, const int32_t start, const int32_t end, const TCHAR* typ): + _startOffset (start), + _endOffset (end), + _type ( typ ), + positionIncrement (1) +{ + _termTextLen = 0; +#ifndef LUCENE_TOKEN_WORD_LENGTH + _termText = NULL; + bufferTextLen = 0; +#else + _termText[0] = 0; //make sure null terminated + bufferTextLen = LUCENE_TOKEN_WORD_LENGTH+1; +#endif + setText(text); +} + +void Token::set(const TCHAR* text, const int32_t start, const int32_t end, const TCHAR* typ){ + _startOffset = start; + _endOffset = end; + _type = typ; + positionIncrement = 1; + setText(text); +} + +void Token::setText(const TCHAR* text){ + _termTextLen = _tcslen(text); + +#ifndef LUCENE_TOKEN_WORD_LENGTH + growBuffer(_termTextLen+1); + _tcsncpy(_termText,text,_termTextLen+1); +#else + if ( _termTextLen > LUCENE_TOKEN_WORD_LENGTH ){ + //in the case where this occurs, we will leave the endOffset as it is + //since the actual word still occupies that space. + _termTextLen=LUCENE_TOKEN_WORD_LENGTH; + } + _tcsncpy(_termText,text,_termTextLen+1); +#endif + _termText[_termTextLen] = 0; //make sure null terminated +} + +void Token::growBuffer(size_t size){ + if(bufferTextLen>=size) + return; +#ifndef LUCENE_TOKEN_WORD_LENGTH + if ( _termText == NULL ) + _termText = (TCHAR*)malloc( size * sizeof(TCHAR) ); + else + _termText = (TCHAR*)realloc( _termText, size * sizeof(TCHAR) ); + bufferTextLen = size; +#else + _CLTHROWA(CL_ERR_TokenMgr,"Couldn't grow Token buffer"); +#endif +} + +void Token::setPositionIncrement(int32_t posIncr) { + if (posIncr < 0) { + _CLTHROWA(CL_ERR_IllegalArgument,"positionIncrement must be >= 0"); + } + positionIncrement = posIncr; +} + +int32_t Token::getPositionIncrement() const { return positionIncrement; } + +// Returns the Token's term text. +const TCHAR* Token::termText() const{ + return (const TCHAR*) _termText; +} +size_t Token::termTextLength() { + if ( _termTextLen == -1 ) //it was invalidated by growBuffer + _termTextLen = _tcslen(_termText); + return _termTextLen; +} +void Token::resetTermTextLen(){ + _termTextLen=-1; +} +bool Token::OrderCompare::operator()( Token* t1, Token* t2 ) const{ + if(t1->startOffset()>t2->startOffset()) + return false; + if(t1->startOffset()<t2->startOffset()) + return true; + return true; +} +TCHAR* Token::toString() const{ + StringBuffer sb; + sb.append(_T("(")); + sb.append( _termText ); + sb.append(_T(",")); + sb.appendInt( _startOffset ); + sb.append(_T(",")); + sb.appendInt( _endOffset ); + + if (!_tcscmp( _type, _T("word")) == 0 ){ + sb.append(_T(",type=")); + sb.append(_type); + } + if (positionIncrement != 1){ + sb.append(_T(",posIncr=")); + sb.appendInt(positionIncrement); + } + sb.append(_T(")")); + + return sb.toString(); +} + + +Token* TokenStream::next(){ + Token* t = _CLNEW Token; //deprecated + if ( !next(t) ) + _CLDELETE(t); + return t; +} + + +TokenFilter::TokenFilter(TokenStream* in, bool deleteTS): + input(in), + deleteTokenStream(deleteTS) +{ +} +TokenFilter::~TokenFilter(){ + close(); +} + +// Close the input TokenStream. +void TokenFilter::close() { + if ( input != NULL ){ + input->close(); + if ( deleteTokenStream ) + _CLDELETE( input ); + } + input = NULL; +} + + + +Tokenizer::Tokenizer() { + input = NULL; +} + +Tokenizer::Tokenizer(CL_NS(util)::Reader* _input): + input(_input) +{ +} + +void Tokenizer::close(){ + if (input != NULL) { + // ? delete input; + input = NULL; + } +} + +Tokenizer::~Tokenizer(){ + close(); +} + + +int32_t Analyzer::getPositionIncrementGap(const TCHAR* fieldName) +{ + return 0; +} + +CL_NS_END |