summaryrefslogtreecommitdiff
path: root/src/backend/access/gin/ginscan.c
diff options
context:
space:
mode:
authorAlexander Korotkov <akorotkov@postgresql.org>2020-01-18 01:11:39 +0300
committerAlexander Korotkov <akorotkov@postgresql.org>2020-01-18 01:11:39 +0300
commit4b754d6c16e16cc1a1adf12ab0f48603069a0efd (patch)
treec976cb8180007dd33bcf9f48d8a24b490c03605e /src/backend/access/gin/ginscan.c
parent41c6f9db25b5e3a8bb8afbb7d6715cff541fd41e (diff)
downloadpostgresql-4b754d6c16e16cc1a1adf12ab0f48603069a0efd.tar.gz
Avoid full scan of GIN indexes when possible
The strategy of GIN index scan is driven by opclass-specific extract_query method. This method that needed search mode is GIN_SEARCH_MODE_ALL. This mode means that matching tuple may contain none of extracted entries. Simple example is '!term' tsquery, which doesn't need any term to exist in matching tsvector. In order to handle such scan key GIN calculates virtual entry, which contains all TIDs of all entries of attribute. In fact this is full scan of index attribute. And typically this is very slow, but allows to handle some queries correctly in GIN. However, current algorithm calculate such virtual entry for each GIN_SEARCH_MODE_ALL scan key even if they are multiple for the same attribute. This is clearly not optimal. This commit improves the situation by introduction of "exclude only" scan keys. Such scan keys are not capable to return set of matching TIDs. Instead, they are capable only to filter TIDs produced by normal scan keys. Therefore, each attribute should contain at least one normal scan key, while rest of them may be "exclude only" if search mode is GIN_SEARCH_MODE_ALL. The same optimization might be applied to the whole scan, not per-attribute. But that leads to NULL values elimination problem. There is trade-off between multiple possible ways to do this. We probably want to do this later using some cost-based decision algorithm. Discussion: https://postgr.es/m/CAOBaU_YGP5-BEt5Cc0%3DzMve92vocPzD%2BXiZgiZs1kjY0cj%3DXBg%40mail.gmail.com Author: Nikita Glukhov, Alexander Korotkov, Tom Lane, Julien Rouhaud Reviewed-by: Julien Rouhaud, Tomas Vondra, Tom Lane
Diffstat (limited to 'src/backend/access/gin/ginscan.c')
-rw-r--r--src/backend/access/gin/ginscan.c127
1 files changed, 78 insertions, 49 deletions
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c
index c15d06ceba..0a685bdbfc 100644
--- a/src/backend/access/gin/ginscan.c
+++ b/src/backend/access/gin/ginscan.c
@@ -126,6 +126,27 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum,
}
/*
+ * Append hidden scan entry of given category to the scan key.
+ *
+ * NB: this had better be called at most once per scan key, since
+ * ginFillScanKey leaves room for only one hidden entry. Currently,
+ * it seems sufficiently clear that this is true that we don't bother
+ * with any cross-check logic.
+ */
+static void
+ginScanKeyAddHiddenEntry(GinScanOpaque so, GinScanKey key,
+ GinNullCategory queryCategory)
+{
+ int i = key->nentries++;
+
+ /* strategy is of no interest because this is not a partial-match item */
+ key->scanEntry[i] = ginFillScanEntry(so, key->attnum,
+ InvalidStrategy, key->searchMode,
+ (Datum) 0, queryCategory,
+ false, NULL);
+}
+
+/*
* Initialize the next GinScanKey using the output from the extractQueryFn
*/
static void
@@ -137,17 +158,16 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum,
{
GinScanKey key = &(so->keys[so->nkeys++]);
GinState *ginstate = &so->ginstate;
- uint32 nUserQueryValues = nQueryValues;
uint32 i;
- /* Non-default search modes add one "hidden" entry to each key */
- if (searchMode != GIN_SEARCH_MODE_DEFAULT)
- nQueryValues++;
key->nentries = nQueryValues;
- key->nuserentries = nUserQueryValues;
+ key->nuserentries = nQueryValues;
- key->scanEntry = (GinScanEntry *) palloc(sizeof(GinScanEntry) * nQueryValues);
- key->entryRes = (GinTernaryValue *) palloc0(sizeof(GinTernaryValue) * nQueryValues);
+ /* Allocate one extra array slot for possible "hidden" entry */
+ key->scanEntry = (GinScanEntry *) palloc(sizeof(GinScanEntry) *
+ (nQueryValues + 1));
+ key->entryRes = (GinTernaryValue *) palloc0(sizeof(GinTernaryValue) *
+ (nQueryValues + 1));
key->query = query;
key->queryValues = queryValues;
@@ -157,6 +177,12 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum,
key->searchMode = searchMode;
key->attnum = attnum;
+ /*
+ * Initially, scan keys of GIN_SEARCH_MODE_ALL mode are marked
+ * excludeOnly. This might get changed later.
+ */
+ key->excludeOnly = (searchMode == GIN_SEARCH_MODE_ALL);
+
ItemPointerSetMin(&key->curItem);
key->curItemMatches = false;
key->recheckCurItem = false;
@@ -168,6 +194,7 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum,
ginInitConsistentFunction(ginstate, key);
+ /* Set up normal scan entries using extractQueryFn's outputs */
for (i = 0; i < nQueryValues; i++)
{
Datum queryKey;
@@ -175,54 +202,28 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum,
bool isPartialMatch;
Pointer this_extra;
- if (i < nUserQueryValues)
- {
- /* set up normal entry using extractQueryFn's outputs */
- queryKey = queryValues[i];
- queryCategory = queryCategories[i];
- isPartialMatch =
- (ginstate->canPartialMatch[attnum - 1] && partial_matches)
- ? partial_matches[i] : false;
- this_extra = (extra_data) ? extra_data[i] : NULL;
- }
- else
- {
- /* set up hidden entry */
- queryKey = (Datum) 0;
- switch (searchMode)
- {
- case GIN_SEARCH_MODE_INCLUDE_EMPTY:
- queryCategory = GIN_CAT_EMPTY_ITEM;
- break;
- case GIN_SEARCH_MODE_ALL:
- queryCategory = GIN_CAT_EMPTY_QUERY;
- break;
- case GIN_SEARCH_MODE_EVERYTHING:
- queryCategory = GIN_CAT_EMPTY_QUERY;
- break;
- default:
- elog(ERROR, "unexpected searchMode: %d", searchMode);
- queryCategory = 0; /* keep compiler quiet */
- break;
- }
- isPartialMatch = false;
- this_extra = NULL;
-
- /*
- * We set the strategy to a fixed value so that ginFillScanEntry
- * can combine these entries for different scan keys. This is
- * safe because the strategy value in the entry struct is only
- * used for partial-match cases. It's OK to overwrite our local
- * variable here because this is the last loop iteration.
- */
- strategy = InvalidStrategy;
- }
+ queryKey = queryValues[i];
+ queryCategory = queryCategories[i];
+ isPartialMatch =
+ (ginstate->canPartialMatch[attnum - 1] && partial_matches)
+ ? partial_matches[i] : false;
+ this_extra = (extra_data) ? extra_data[i] : NULL;
key->scanEntry[i] = ginFillScanEntry(so, attnum,
strategy, searchMode,
queryKey, queryCategory,
isPartialMatch, this_extra);
}
+
+ /*
+ * For GIN_SEARCH_MODE_INCLUDE_EMPTY and GIN_SEARCH_MODE_EVERYTHING search
+ * modes, we add the "hidden" entry immediately. GIN_SEARCH_MODE_ALL is
+ * handled later, since we might be able to omit the hidden entry for it.
+ */
+ if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
+ ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_ITEM);
+ else if (searchMode == GIN_SEARCH_MODE_EVERYTHING)
+ ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_QUERY);
}
/*
@@ -265,6 +266,7 @@ ginNewScanKey(IndexScanDesc scan)
GinScanOpaque so = (GinScanOpaque) scan->opaque;
int i;
bool hasNullQuery = false;
+ bool attrHasNormalScan[INDEX_MAX_KEYS] = {false};
MemoryContext oldCtx;
/*
@@ -371,6 +373,33 @@ ginNewScanKey(IndexScanDesc scan)
skey->sk_argument, nQueryValues,
queryValues, categories,
partial_matches, extra_data);
+
+ /* Remember if we had any non-excludeOnly keys */
+ if (searchMode != GIN_SEARCH_MODE_ALL)
+ attrHasNormalScan[skey->sk_attno - 1] = true;
+ }
+
+ /*
+ * Processing GIN_SEARCH_MODE_ALL scan keys requires us to make a second
+ * pass over the scan keys. Above we marked each such scan key as
+ * excludeOnly. If the involved column has any normal (not excludeOnly)
+ * scan key as well, then we can leave it like that. Otherwise, one
+ * excludeOnly scan key must receive a GIN_CAT_EMPTY_QUERY hidden entry
+ * and be set to normal (excludeOnly = false).
+ */
+ for (i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = &so->keys[i];
+
+ if (key->searchMode != GIN_SEARCH_MODE_ALL)
+ continue;
+
+ if (!attrHasNormalScan[key->attnum - 1])
+ {
+ key->excludeOnly = false;
+ ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_QUERY);
+ attrHasNormalScan[key->attnum - 1] = true;
+ }
}
/*