/*------------------------------------------------------------------------- * * regis.c * Fast regex subset * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * * * IDENTIFICATION * src/backend/tsearch/regis.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "tsearch/dicts/regis.h" #include "tsearch/ts_locale.h" #define RS_IN_ONEOF 1 #define RS_IN_ONEOF_IN 2 #define RS_IN_NONEOF 3 #define RS_IN_WAIT 4 /* * Test whether a regex is of the subset supported here. * Keep this in sync with RS_compile! */ bool RS_isRegis(const char *str) { int state = RS_IN_WAIT; const char *c = str; while (*c) { if (state == RS_IN_WAIT) { if (t_isalpha(c)) /* okay */ ; else if (t_iseq(c, '[')) state = RS_IN_ONEOF; else return false; } else if (state == RS_IN_ONEOF) { if (t_iseq(c, '^')) state = RS_IN_NONEOF; else if (t_isalpha(c)) state = RS_IN_ONEOF_IN; else return false; } else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) { if (t_isalpha(c)) /* okay */ ; else if (t_iseq(c, ']')) state = RS_IN_WAIT; else return false; } else elog(ERROR, "internal error in RS_isRegis: state %d", state); c += pg_mblen(c); } return (state == RS_IN_WAIT); } static RegisNode * newRegisNode(RegisNode *prev, int len) { RegisNode *ptr; ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1); if (prev) prev->next = ptr; return ptr; } void RS_compile(Regis *r, bool issuffix, const char *str) { int len = strlen(str); int state = RS_IN_WAIT; const char *c = str; RegisNode *ptr = NULL; memset(r, 0, sizeof(Regis)); r->issuffix = (issuffix) ? 1 : 0; while (*c) { if (state == RS_IN_WAIT) { if (t_isalpha(c)) { if (ptr) ptr = newRegisNode(ptr, len); else ptr = r->node = newRegisNode(NULL, len); COPYCHAR(ptr->data, c); ptr->type = RSF_ONEOF; ptr->len = pg_mblen(c); } else if (t_iseq(c, '[')) { if (ptr) ptr = newRegisNode(ptr, len); else ptr = r->node = newRegisNode(NULL, len); ptr->type = RSF_ONEOF; state = RS_IN_ONEOF; } else /* shouldn't get here */ elog(ERROR, "invalid regis pattern: \"%s\"", str); } else if (state == RS_IN_ONEOF) { if (t_iseq(c, '^')) { ptr->type = RSF_NONEOF; state = RS_IN_NONEOF; } else if (t_isalpha(c)) { COPYCHAR(ptr->data, c); ptr->len = pg_mblen(c); state = RS_IN_ONEOF_IN; } else /* shouldn't get here */ elog(ERROR, "invalid regis pattern: \"%s\"", str); } else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) { if (t_isalpha(c)) { COPYCHAR(ptr->data + ptr->len, c); ptr->len += pg_mblen(c); } else if (t_iseq(c, ']')) state = RS_IN_WAIT; else /* shouldn't get here */ elog(ERROR, "invalid regis pattern: \"%s\"", str); } else elog(ERROR, "internal error in RS_compile: state %d", state); c += pg_mblen(c); } if (state != RS_IN_WAIT) /* shouldn't get here */ elog(ERROR, "invalid regis pattern: \"%s\"", str); ptr = r->node; while (ptr) { r->nchar++; ptr = ptr->next; } } void RS_free(Regis *r) { RegisNode *ptr = r->node, *tmp; while (ptr) { tmp = ptr->next; pfree(ptr); ptr = tmp; } r->node = NULL; } static bool mb_strchr(char *str, char *c) { int clen, plen, i; char *ptr = str; bool res = false; clen = pg_mblen(c); while (*ptr && !res) { plen = pg_mblen(ptr); if (plen == clen) { i = plen; res = true; while (i--) if (*(ptr + i) != *(c + i)) { res = false; break; } } ptr += plen; } return res; } bool RS_execute(Regis *r, char *str) { RegisNode *ptr = r->node; char *c = str; int len = 0; while (*c) { len++; c += pg_mblen(c); } if (len < r->nchar) return 0; c = str; if (r->issuffix) { len -= r->nchar; while (len-- > 0) c += pg_mblen(c); } while (ptr) { switch (ptr->type) { case RSF_ONEOF: if (!mb_strchr((char *) ptr->data, c)) return false; break; case RSF_NONEOF: if (mb_strchr((char *) ptr->data, c)) return false; break; default: elog(ERROR, "unrecognized regis node type: %d", ptr->type); } ptr = ptr->next; c += pg_mblen(c); } return true; }