summaryrefslogtreecommitdiff
path: root/chromium/third_party/sqlite/src/ext/misc/regexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/sqlite/src/ext/misc/regexp.c')
-rw-r--r--chromium/third_party/sqlite/src/ext/misc/regexp.c134
1 files changed, 121 insertions, 13 deletions
diff --git a/chromium/third_party/sqlite/src/ext/misc/regexp.c b/chromium/third_party/sqlite/src/ext/misc/regexp.c
index 52973cc73fb..5da80dd9245 100644
--- a/chromium/third_party/sqlite/src/ext/misc/regexp.c
+++ b/chromium/third_party/sqlite/src/ext/misc/regexp.c
@@ -72,6 +72,7 @@ SQLITE_EXTENSION_INIT1
/* The end-of-input character */
#define RE_EOF 0 /* End of input */
+#define RE_START 0xfffffff /* Start of input - larger than an UTF-8 */
/* The NFA is implemented as sequence of opcodes taken from the following
** set. Each opcode has a single integer argument.
@@ -93,6 +94,33 @@ SQLITE_EXTENSION_INIT1
#define RE_OP_SPACE 15 /* space: [ \t\n\r\v\f] */
#define RE_OP_NOTSPACE 16 /* Not a digit */
#define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */
+#define RE_OP_ATSTART 18 /* Currently at the start of the string */
+
+#if defined(SQLITE_DEBUG)
+/* Opcode names used for symbolic debugging */
+static const char *ReOpName[] = {
+ "EOF",
+ "MATCH",
+ "ANY",
+ "ANYSTAR",
+ "FORK",
+ "GOTO",
+ "ACCEPT",
+ "CC_INC",
+ "CC_EXC",
+ "CC_VALUE",
+ "CC_RANGE",
+ "WORD",
+ "NOTWORD",
+ "DIGIT",
+ "NOTDIGIT",
+ "SPACE",
+ "NOTSPACE",
+ "BOUNDARY",
+ "ATSTART",
+};
+#endif /* SQLITE_DEBUG */
+
/* Each opcode is a "state" in the NFA */
typedef unsigned short ReStateNumber;
@@ -127,7 +155,7 @@ struct ReCompiled {
int *aArg; /* Arguments to each operator */
unsigned (*xNextChar)(ReInput*); /* Next character function */
unsigned char zInit[12]; /* Initial text to match */
- int nInit; /* Number of characters in zInit */
+ int nInit; /* Number of bytes in zInit */
unsigned nState; /* Number of entries in aOp[] and aArg[] */
unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */
};
@@ -157,7 +185,7 @@ static unsigned re_next_char(ReInput *p){
c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f);
p->i += 2;
if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;
- }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80
+ }else if( (c&0xf8)==0xf0 && p->i+2<p->mx && (p->z[p->i]&0xc0)==0x80
&& (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){
c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6)
| (p->z[p->i+2]&0x3f);
@@ -200,7 +228,7 @@ static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){
ReStateNumber *pToFree;
unsigned int i = 0;
unsigned int iSwap = 0;
- int c = RE_EOF+1;
+ int c = RE_START;
int cPrev = 0;
int rc = 0;
ReInput in;
@@ -219,6 +247,7 @@ static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){
in.i++;
}
if( in.i+pRe->nInit>in.mx ) return 0;
+ c = RE_START-1;
}
if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
@@ -247,6 +276,10 @@ static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){
if( pRe->aArg[x]==c ) re_add_state(pNext, x+1);
break;
}
+ case RE_OP_ATSTART: {
+ if( cPrev==RE_START ) re_add_state(pThis, x+1);
+ break;
+ }
case RE_OP_ANY: {
if( c!=0 ) re_add_state(pNext, x+1);
break;
@@ -328,7 +361,9 @@ static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){
}
}
for(i=0; i<pNext->nState; i++){
- if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; }
+ int x = pNext->aState[i];
+ while( pRe->aOp[x]==RE_OP_GOTO ) x += pRe->aArg[x];
+ if( pRe->aOp[x]==RE_OP_ACCEPT ){ rc = 1; break; }
}
re_match_end:
sqlite3_free(pToFree);
@@ -483,7 +518,6 @@ static const char *re_subcompile_string(ReCompiled *p){
iStart = p->nState;
switch( c ){
case '|':
- case '$':
case ')': {
p->sIn.i--;
return 0;
@@ -520,6 +554,14 @@ static const char *re_subcompile_string(ReCompiled *p){
re_insert(p, iPrev, RE_OP_FORK, p->nState - iPrev+1);
break;
}
+ case '$': {
+ re_append(p, RE_OP_MATCH, RE_EOF);
+ break;
+ }
+ case '^': {
+ re_append(p, RE_OP_ATSTART, 0);
+ break;
+ }
case '{': {
int m = 0, n = 0;
int sz, j;
@@ -538,6 +580,7 @@ static const char *re_subcompile_string(ReCompiled *p){
if( m==0 ){
if( n==0 ) return "both m and n are zero in '{m,n}'";
re_insert(p, iPrev, RE_OP_FORK, sz+1);
+ iPrev++;
n--;
}else{
for(j=1; j<m; j++) re_copy(p, iPrev, sz);
@@ -656,11 +699,7 @@ static const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){
re_free(pRe);
return zErr;
}
- if( rePeek(pRe)=='$' && pRe->sIn.i+1>=pRe->sIn.mx ){
- re_append(pRe, RE_OP_MATCH, RE_EOF);
- re_append(pRe, RE_OP_ACCEPT, 0);
- *ppRe = pRe;
- }else if( pRe->sIn.i>=pRe->sIn.mx ){
+ if( pRe->sIn.i>=pRe->sIn.mx ){
re_append(pRe, RE_OP_ACCEPT, 0);
*ppRe = pRe;
}else{
@@ -673,15 +712,15 @@ static const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){
** one or more matching characters, enter those matching characters into
** zInit[]. The re_match() routine can then search ahead in the input
** string looking for the initial match without having to run the whole
- ** regex engine over the string. Do not worry able trying to match
+ ** regex engine over the string. Do not worry about trying to match
** unicode characters beyond plane 0 - those are very rare and this is
** just an optimization. */
if( pRe->aOp[0]==RE_OP_ANYSTAR && !noCase ){
for(j=0, i=1; j<(int)sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){
unsigned x = pRe->aArg[i];
- if( x<=127 ){
+ if( x<=0x7f ){
pRe->zInit[j++] = (unsigned char)x;
- }else if( x<=0xfff ){
+ }else if( x<=0x7ff ){
pRe->zInit[j++] = (unsigned char)(0xc0 | (x>>6));
pRe->zInit[j++] = 0x80 | (x&0x3f);
}else if( x<=0xffff ){
@@ -744,6 +783,68 @@ static void re_sql_func(
}
}
+#if defined(SQLITE_DEBUG)
+/*
+** This function is used for testing and debugging only. It is only available
+** if the SQLITE_DEBUG compile-time option is used.
+**
+** Compile a regular expression and then convert the compiled expression into
+** text and return that text.
+*/
+static void re_bytecode_func(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ const char *zPattern;
+ const char *zErr;
+ ReCompiled *pRe;
+ sqlite3_str *pStr;
+ int i;
+ int n;
+ char *z;
+ (void)argc;
+
+ zPattern = (const char*)sqlite3_value_text(argv[0]);
+ if( zPattern==0 ) return;
+ zErr = re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0);
+ if( zErr ){
+ re_free(pRe);
+ sqlite3_result_error(context, zErr, -1);
+ return;
+ }
+ if( pRe==0 ){
+ sqlite3_result_error_nomem(context);
+ return;
+ }
+ pStr = sqlite3_str_new(0);
+ if( pStr==0 ) goto re_bytecode_func_err;
+ if( pRe->nInit>0 ){
+ sqlite3_str_appendf(pStr, "INIT ");
+ for(i=0; i<pRe->nInit; i++){
+ sqlite3_str_appendf(pStr, "%02x", pRe->zInit[i]);
+ }
+ sqlite3_str_appendf(pStr, "\n");
+ }
+ for(i=0; (unsigned)i<pRe->nState; i++){
+ sqlite3_str_appendf(pStr, "%-8s %4d\n",
+ ReOpName[(unsigned char)pRe->aOp[i]], pRe->aArg[i]);
+ }
+ n = sqlite3_str_length(pStr);
+ z = sqlite3_str_finish(pStr);
+ if( n==0 ){
+ sqlite3_free(z);
+ }else{
+ sqlite3_result_text(context, z, n-1, sqlite3_free);
+ }
+
+re_bytecode_func_err:
+ re_free(pRe);
+}
+
+#endif /* SQLITE_DEBUG */
+
+
/*
** Invoke this routine to register the regexp() function with the
** SQLite database connection.
@@ -768,6 +869,13 @@ int sqlite3_regexp_init(
rc = sqlite3_create_function(db, "regexpi", 2,
SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC,
(void*)db, re_sql_func, 0, 0);
+#if defined(SQLITE_DEBUG)
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(db, "regexp_bytecode", 1,
+ SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC,
+ 0, re_bytecode_func, 0, 0);
+ }
+#endif /* SQLITE_DEBUG */
}
return rc;
}