summaryrefslogtreecommitdiff
path: root/src/backend/tsearch/wparser_def.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/tsearch/wparser_def.c')
-rw-r--r--src/backend/tsearch/wparser_def.c173
1 files changed, 99 insertions, 74 deletions
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 9a351ac9da..e5c4d7cb04 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -1942,9 +1942,10 @@ prsd_end(PG_FUNCTION_ARGS)
#define INTERESTINGWORD(j) \
(prs->words[j].item && !prs->words[j].repeated)
-/* Don't want to end at a non-word or a short word */
+/* Don't want to end at a non-word or a short word, unless interesting */
#define BADENDPOINT(j) \
- (NOENDTOKEN(prs->words[j].type) || prs->words[j].len <= shortword)
+ ((NOENDTOKEN(prs->words[j].type) || prs->words[j].len <= shortword) && \
+ !INTERESTINGWORD(j))
typedef struct
{
@@ -2003,75 +2004,97 @@ checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
return false;
}
-
-static bool
-hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q)
+/*
+ * hlFirstIndex: find first index >= pos containing any word used in query
+ *
+ * Returns -1 if no such index
+ */
+static int
+hlFirstIndex(HeadlineParsedText *prs, TSQuery query, int pos)
{
- int i,
- j;
- QueryItem *item = GETQUERY(query);
- int pos = *p;
-
- *q = -1;
- *p = INT_MAX;
+ int i;
- for (j = 0; j < query->size; j++)
+ /* For each word ... */
+ for (i = pos; i < prs->curwords; i++)
{
- if (item->type != QI_VAL)
+ /* ... scan the query to see if this word matches any operand */
+ QueryItem *item = GETQUERY(query);
+ int j;
+
+ for (j = 0; j < query->size; j++)
{
+ if (item->type == QI_VAL &&
+ prs->words[i].item == &item->qoperand)
+ return i;
item++;
- continue;
- }
- for (i = pos; i < prs->curwords; i++)
- {
- if (prs->words[i].item == &item->qoperand)
- {
- if (i > *q)
- *q = i;
- break;
- }
}
- item++;
}
+ return -1;
+}
- if (*q < 0)
- return false;
+/*
+ * hlCover: try to find a substring of prs' word list that satisfies query
+ *
+ * At entry, *p must be the first word index to consider (initialize this to
+ * zero, or to the next index after a previous successful search).
+ *
+ * On success, sets *p to first word index and *q to last word index of the
+ * cover substring, and returns true.
+ *
+ * The result is a minimal cover, in the sense that both *p and *q will be
+ * words used in the query.
+ */
+static bool
+hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q)
+{
+ int pmin,
+ pmax,
+ nextpmin,
+ nextpmax;
+ hlCheck ch;
- item = GETQUERY(query);
- for (j = 0; j < query->size; j++)
+ /*
+ * We look for the earliest, shortest substring of prs->words that
+ * satisfies the query. Both the pmin and pmax indices must be words
+ * appearing in the query; there's no point in trying endpoints in between
+ * such points.
+ */
+ pmin = hlFirstIndex(prs, query, *p);
+ while (pmin >= 0)
{
- if (item->type != QI_VAL)
+ /* This useless assignment just keeps stupider compilers quiet */
+ nextpmin = -1;
+ /* Consider substrings starting at pmin */
+ ch.words = &(prs->words[pmin]);
+ /* Consider the length-one substring first, then longer substrings */
+ pmax = pmin;
+ do
{
- item++;
- continue;
- }
- for (i = *q; i >= pos; i--)
- {
- if (prs->words[i].item == &item->qoperand)
+ /* Try to match query against pmin .. pmax substring */
+ ch.len = pmax - pmin + 1;
+ if (TS_execute(GETQUERY(query), &ch,
+ TS_EXEC_EMPTY, checkcondition_HL))
{
- if (i < *p)
- *p = i;
- break;
+ *p = pmin;
+ *q = pmax;
+ return true;
}
- }
- item++;
- }
-
- if (*p <= *q)
- {
- hlCheck ch;
+ /* Nope, so advance pmax to next feasible endpoint */
+ nextpmax = hlFirstIndex(prs, query, pmax + 1);
- ch.words = &(prs->words[*p]);
- ch.len = *q - *p + 1;
- if (TS_execute(GETQUERY(query), &ch, TS_EXEC_EMPTY, checkcondition_HL))
- return true;
- else
- {
- (*p)++;
- return hlCover(prs, query, p, q);
+ /*
+ * If this is our first advance past pmin, then the result is also
+ * the next feasible value of pmin; remember it to save a
+ * redundant search.
+ */
+ if (pmax == pmin)
+ nextpmin = nextpmax;
+ pmax = nextpmax;
}
+ while (pmax >= 0);
+ /* No luck here, so try next feasible startpoint */
+ pmin = nextpmin;
}
-
return false;
}
@@ -2357,11 +2380,12 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
int bestb = -1,
beste = -1;
int bestlen = -1;
+ bool bestcover = false;
int pose,
posb,
poslen,
curlen;
-
+ bool poscover;
int i;
if (!highlightall)
@@ -2387,14 +2411,6 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
pose = i;
}
- /* XXX this optimization seems unnecessary and wrong */
- if (poslen < bestlen && !BADENDPOINT(beste))
- {
- /* better cover already found, so try next cover */
- p++;
- continue;
- }
-
if (curlen < max_words)
{
/*
@@ -2449,29 +2465,38 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
i = q;
for (; curlen > min_words; i--)
{
+ if (!BADENDPOINT(i))
+ break;
if (!NONWORDTOKEN(prs->words[i].type))
curlen--;
if (INTERESTINGWORD(i))
poslen--;
- pose = i;
- if (!BADENDPOINT(i))
- break;
+ pose = i - 1;
}
}
/*
- * Adopt this headline if it's the first, or if it has more
- * interesting words and isn't ending at a bad endpoint, or if it
- * replaces a bad endpoint with a good one (XXX even if it has
- * fewer interesting words? Really?)
+ * Check whether the proposed headline includes the original
+ * cover; it might not if we trimmed it due to max_words.
+ */
+ poscover = (posb <= p && pose >= q);
+
+ /*
+ * Adopt this headline if it's better than the last one, giving
+ * highest priority to headlines including the cover, then to
+ * headlines with more interesting words, then to headlines with
+ * good stopping points. (Since bestlen is initially -1, we will
+ * certainly adopt the first headline.)
*/
- if (bestlen < 0 ||
- (poslen > bestlen && !BADENDPOINT(pose)) ||
- (!BADENDPOINT(pose) && BADENDPOINT(beste)))
+ if (poscover > bestcover ||
+ (poscover == bestcover && poslen > bestlen) ||
+ (poscover == bestcover && poslen == bestlen &&
+ !BADENDPOINT(pose) && BADENDPOINT(beste)))
{
bestb = posb;
beste = pose;
bestlen = poslen;
+ bestcover = poscover;
}
/* move p to generate the next cover */