summaryrefslogtreecommitdiff
path: root/src/include/regex/regguts.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/regex/regguts.h')
-rw-r--r--src/include/regex/regguts.h141
1 files changed, 66 insertions, 75 deletions
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h
index b0aa641cc4..69816f1449 100644
--- a/src/include/regex/regguts.h
+++ b/src/include/regex/regguts.h
@@ -127,23 +127,6 @@
#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS)))
-
-/*
- * We dissect a chr into byts for colormap table indexing. Here we define
- * a byt, which will be the same as a byte on most machines... The exact
- * size of a byt is not critical, but about 8 bits is good, and extraction
- * of 8-bit chunks is sometimes especially fast.
- */
-#ifndef BYTBITS
-#define BYTBITS 8 /* bits in a byt */
-#endif
-#define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt value */
-#define BYTMASK (BYTTAB-1) /* bit mask for byt */
-#define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS)
-/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
-
-
-
/*
* As soon as possible, we map chrs into equivalence classes -- "colors" --
* which are of much more manageable number.
@@ -153,43 +136,10 @@ typedef short color; /* colors of characters */
#define MAX_COLOR 32767 /* max color (must fit in 'color' datatype) */
#define COLORLESS (-1) /* impossible color */
#define WHITE 0 /* default color, parent of all others */
-
+/* Note: various places in the code know that WHITE is zero */
/*
- * A colormap is a tree -- more precisely, a DAG -- indexed at each level
- * by a byt of the chr, to map the chr to a color efficiently. Because
- * lower sections of the tree can be shared, it can exploit the usual
- * sparseness of such a mapping table. The tree is always NBYTS levels
- * deep (in the past it was shallower during construction but was "filled"
- * to full depth at the end of that); areas that are unaltered as yet point
- * to "fill blocks" which are entirely WHITE in color.
- *
- * Leaf-level tree blocks are of type "struct colors", while upper-level
- * blocks are of type "struct ptrs". Pointers into the tree are generally
- * declared as "union tree *" to be agnostic about what level they point to.
- */
-
-/* the tree itself */
-struct colors
-{
- color ccolor[BYTTAB];
-};
-struct ptrs
-{
- union tree *pptr[BYTTAB];
-};
-union tree
-{
- struct colors colors;
- struct ptrs ptrs;
-};
-
-/* use these pseudo-field names when dereferencing a "union tree" pointer */
-#define tcolor colors.ccolor
-#define tptr ptrs.pptr
-
-/*
* Per-color data structure for the compile-time color machinery
*
* If "sub" is not NOSUB then it is the number of the color's current
@@ -203,26 +153,56 @@ union tree
*/
struct colordesc
{
- uchr nchrs; /* number of chars of this color */
+ int nschrs; /* number of simple chars of this color */
+ int nuchrs; /* number of upper map entries of this color */
color sub; /* open subcolor, if any; or free-chain ptr */
#define NOSUB COLORLESS /* value of "sub" when no open subcolor */
struct arc *arcs; /* chain of all arcs of this color */
- chr firstchr; /* char first assigned to this color */
+ chr firstchr; /* simple char first assigned to this color */
int flags; /* bit values defined next */
#define FREECOL 01 /* currently free */
#define PSEUDO 02 /* pseudocolor, no real chars */
#define UNUSEDCOLOR(cd) ((cd)->flags & FREECOL)
- union tree *block; /* block of solid color, if any */
};
/*
* The color map itself
*
- * Much of the data in the colormap struct is only used at compile time.
- * However, the bulk of the space usage is in the "tree" structure, so it's
- * not clear that there's much point in converting the rest to a more compact
- * form when compilation is finished.
+ * This struct holds both data used only at compile time, and the chr to
+ * color mapping information, used at both compile and run time. The latter
+ * is the bulk of the space, so it's not really worth separating out the
+ * compile-only portion.
+ *
+ * Ideally, the mapping data would just be an array of colors indexed by
+ * chr codes; but for large character sets that's impractical. Fortunately,
+ * common characters have smaller codes, so we can use a simple array for chr
+ * codes up to MAX_SIMPLE_CHR, and do something more complex for codes above
+ * that, without much loss of performance. The "something more complex" is a
+ * 2-D array of color entries, where row indexes correspond to individual chrs
+ * or chr ranges that have been mentioned in the regex (with row zero
+ * representing all other chrs), and column indexes correspond to different
+ * sets of locale-dependent character classes such as "isalpha". The
+ * classbits[k] entry is zero if we do not care about the k'th character class
+ * in this regex, and otherwise it is the bit to be OR'd into the column index
+ * if the character in question is a member of that class. We find the color
+ * of a high-valued chr by identifying which colormaprange it is in to get
+ * the row index (use row zero if it's in none of them), identifying which of
+ * the interesting cclasses it's in to get the column index, and then indexing
+ * into the 2-D hicolormap array.
+ *
+ * The colormapranges are required to be nonempty, nonoverlapping, and to
+ * appear in increasing chr-value order.
*/
+
+#define NUM_CCLASSES 13 /* must match data in regc_locale.c */
+
+typedef struct colormaprange
+{
+ chr cmin; /* range represents cmin..cmax inclusive */
+ chr cmax;
+ int rownum; /* row index in hicolormap array (>= 1) */
+} colormaprange;
+
struct colormap
{
int magic;
@@ -233,27 +213,27 @@ struct colormap
color free; /* beginning of free chain (if non-0) */
struct colordesc *cd; /* pointer to array of colordescs */
#define CDEND(cm) (&(cm)->cd[(cm)->max + 1])
+
+ /* mapping data for chrs <= MAX_SIMPLE_CHR: */
+ color *locolormap; /* simple array indexed by chr code */
+
+ /* mapping data for chrs > MAX_SIMPLE_CHR: */
+ int classbits[NUM_CCLASSES]; /* see comment above */
+ int numcmranges; /* number of colormapranges */
+ colormaprange *cmranges; /* ranges of high chrs */
+ color *hicolormap; /* 2-D array of color entries */
+ int maxarrayrows; /* number of array rows allocated */
+ int hiarrayrows; /* number of array rows in use */
+ int hiarraycols; /* number of array columns (2^N) */
+
/* If we need up to NINLINECDS, we store them here to save a malloc */
-#define NINLINECDS ((size_t)10)
+#define NINLINECDS ((size_t) 10)
struct colordesc cdspace[NINLINECDS];
- union tree tree[NBYTS]; /* tree top, plus lower-level fill blocks */
};
-/* optimization magic to do fast chr->color mapping */
-#define B0(c) ((c) & BYTMASK)
-#define B1(c) (((c)>>BYTBITS) & BYTMASK)
-#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK)
-#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK)
-#if NBYTS == 1
-#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)])
-#endif
-/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */
-#if NBYTS == 2
-#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
-#endif
-#if NBYTS == 4
-#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)])
-#endif
+/* fetch color for chr; beware of multiple evaluation of c argument */
+#define GETCOLOR(cm, c) \
+ ((c) <= MAX_SIMPLE_CHR ? (cm)->locolormap[(c) - CHR_MIN] : pg_reg_getcolor(cm, c))
/*
@@ -264,6 +244,11 @@ struct colormap
* Representation of a set of characters. chrs[] represents individual
* code points, ranges[] represents ranges in the form min..max inclusive.
*
+ * If the cvec represents a locale-specific character class, eg [[:alpha:]],
+ * then the chrs[] and ranges[] arrays contain only members of that class
+ * up to MAX_SIMPLE_CHR (inclusive). cclasscode is set to regc_locale.c's
+ * code for the class, rather than being -1 as it is in an ordinary cvec.
+ *
* Note that in cvecs gotten from newcvec() and intended to be freed by
* freecvec(), both arrays of chrs are after the end of the struct, not
* separately malloc'd; so chrspace and rangespace are effectively immutable.
@@ -276,6 +261,7 @@ struct cvec
int nranges; /* number of ranges (chr pairs) */
int rangespace; /* number of ranges allocated in ranges[] */
chr *ranges; /* pointer to vector of chr pairs */
+ int cclasscode; /* value of "enum classes", or -1 */
};
@@ -489,3 +475,8 @@ struct guts
int nlacons; /* size of lacons[]; note that only slots
* numbered 1 .. nlacons-1 are used */
};
+
+
+/* prototypes for functions that are exported from regcomp.c to regexec.c */
+extern void pg_set_regex_collation(Oid collation);
+extern color pg_reg_getcolor(struct colormap * cm, chr c);