summaryrefslogtreecommitdiff
path: root/3rdparty/clucene/src/CLucene/search/WildcardTermEnum.cpp
blob: bed9e6e0cf006d47b883bff35a1c0a7d29d24aba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "WildcardTermEnum.h"

CL_NS_USE(index)
CL_NS_DEF(search)

    bool WildcardTermEnum::termCompare(Term* term) {
        if ( term!=NULL && __term->field() == term->field() ) {
            const TCHAR* searchText = term->text();
            const TCHAR* patternText = __term->text();
			if ( _tcsncmp( searchText, pre, preLen ) == 0 ){
               return wildcardEquals(patternText+preLen, __term->textLength()-preLen, 0, searchText, term->textLength(), preLen);
			}
        }
        _endEnum = true;
        return false;
    }

    /** Creates new WildcardTermEnum */
    WildcardTermEnum::WildcardTermEnum(IndexReader* reader, Term* term):
	    FilteredTermEnum(),
		__term(_CL_POINTER(term)),
		fieldMatch(false),
		_endEnum(false)
    {
       
		pre = stringDuplicate(term->text());

		const TCHAR* sidx = _tcschr( pre, LUCENE_WILDCARDTERMENUM_WILDCARD_STRING );
		const TCHAR* cidx = _tcschr( pre, LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR );
		const TCHAR* tidx = sidx;
		if (tidx == NULL) 
			tidx = cidx;
		else if ( cidx && cidx > pre) 
			tidx = min(sidx, cidx);
		CND_PRECONDITION(tidx != NULL, "tidx==NULL");
		int32_t idx = (int32_t)(tidx - pre);
		preLen = idx;
		CND_PRECONDITION(preLen<term->textLength(), "preLen >= term->textLength()");
		pre[preLen]=0; //trim end

		Term* t = _CLNEW Term(__term, pre);
		setEnum( reader->terms(t) );
		_CLDECDELETE(t);
  }

    void WildcardTermEnum::close()
    {
       if ( __term != NULL ){
         FilteredTermEnum::close();

         _CLDECDELETE(__term);
         __term = NULL;

         _CLDELETE_CARRAY( pre );
       }
    }
    WildcardTermEnum::~WildcardTermEnum() {
      close();
    }

    qreal WildcardTermEnum::difference() {
        return 1.0f;
    }

    bool WildcardTermEnum::endEnum() {
        return _endEnum;
    }

    bool WildcardTermEnum::wildcardEquals(const TCHAR* pattern, int32_t patternLen, int32_t patternIdx, const TCHAR* str, int32_t strLen, int32_t stringIdx)
    {
        for (int32_t p = patternIdx; ; ++p)
        {
            for (int32_t s = stringIdx; ; ++p, ++s)
            {
                // End of str yet?
                bool sEnd = (s >= strLen);
                // End of pattern yet?
                bool pEnd = (p >= patternLen);

                // If we're looking at the end of the str...
                if (sEnd)
                {
                    // Assume the only thing left on the pattern is/are wildcards
                    bool justWildcardsLeft = true;

                    // Current wildcard position
                    int32_t wildcardSearchPos = p;
                    // While we haven't found the end of the pattern,
                	// and haven't encountered any non-wildcard characters
                    while (wildcardSearchPos < patternLen && justWildcardsLeft)
                    {
                        // Check the character at the current position
                        TCHAR wildchar = pattern[wildcardSearchPos];
                        // If it's not a wildcard character, then there is more
                  		// pattern information after this/these wildcards.

                        if (wildchar != LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR && 
                        		wildchar != LUCENE_WILDCARDTERMENUM_WILDCARD_STRING){
                            justWildcardsLeft = false;
                        }else{
                        	// to prevent "cat" matches "ca??"
							if (wildchar == LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR)
								return false;

                            wildcardSearchPos++; // Look at the next character
                        }
                    }

                    // This was a prefix wildcard search, and we've matched, so
                	// return true.
                    if (justWildcardsLeft)
	                	return true;
	            }
	
	            // If we've gone past the end of the str, or the pattern,
	            // return false.
	            if (sEnd || pEnd)
	                break;
	
	            // Match a single character, so continue.
				if (pattern[p] == LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR)
	                continue;

                if (pattern[p] == LUCENE_WILDCARDTERMENUM_WILDCARD_STRING)
                {
                    // Look at the character beyond the '*'.
                    ++p;
                    // Examine the str, starting at the last character.
					for (int32_t i = strLen; i >= s; --i)
					{
						if (wildcardEquals(pattern, patternLen, p, str, strLen, i))
							return true;
					}
                    break;
                }
        	if (pattern[p] != str[s])
                break;
        }
        return false;
      }
    }

CL_NS_END