diff options
Diffstat (limited to 'chromium/third_party/sqlite/src/ext/misc/regexp.c')
-rw-r--r-- | chromium/third_party/sqlite/src/ext/misc/regexp.c | 134 |
1 files changed, 121 insertions, 13 deletions
diff --git a/chromium/third_party/sqlite/src/ext/misc/regexp.c b/chromium/third_party/sqlite/src/ext/misc/regexp.c index 52973cc73fb..5da80dd9245 100644 --- a/chromium/third_party/sqlite/src/ext/misc/regexp.c +++ b/chromium/third_party/sqlite/src/ext/misc/regexp.c @@ -72,6 +72,7 @@ SQLITE_EXTENSION_INIT1 /* The end-of-input character */ #define RE_EOF 0 /* End of input */ +#define RE_START 0xfffffff /* Start of input - larger than an UTF-8 */ /* The NFA is implemented as sequence of opcodes taken from the following ** set. Each opcode has a single integer argument. @@ -93,6 +94,33 @@ SQLITE_EXTENSION_INIT1 #define RE_OP_SPACE 15 /* space: [ \t\n\r\v\f] */ #define RE_OP_NOTSPACE 16 /* Not a digit */ #define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */ +#define RE_OP_ATSTART 18 /* Currently at the start of the string */ + +#if defined(SQLITE_DEBUG) +/* Opcode names used for symbolic debugging */ +static const char *ReOpName[] = { + "EOF", + "MATCH", + "ANY", + "ANYSTAR", + "FORK", + "GOTO", + "ACCEPT", + "CC_INC", + "CC_EXC", + "CC_VALUE", + "CC_RANGE", + "WORD", + "NOTWORD", + "DIGIT", + "NOTDIGIT", + "SPACE", + "NOTSPACE", + "BOUNDARY", + "ATSTART", +}; +#endif /* SQLITE_DEBUG */ + /* Each opcode is a "state" in the NFA */ typedef unsigned short ReStateNumber; @@ -127,7 +155,7 @@ struct ReCompiled { int *aArg; /* Arguments to each operator */ unsigned (*xNextChar)(ReInput*); /* Next character function */ unsigned char zInit[12]; /* Initial text to match */ - int nInit; /* Number of characters in zInit */ + int nInit; /* Number of bytes in zInit */ unsigned nState; /* Number of entries in aOp[] and aArg[] */ unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */ }; @@ -157,7 +185,7 @@ static unsigned re_next_char(ReInput *p){ c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); p->i += 2; if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; - }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80 + }else if( (c&0xf8)==0xf0 && p->i+2<p->mx && (p->z[p->i]&0xc0)==0x80 && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) | (p->z[p->i+2]&0x3f); @@ -200,7 +228,7 @@ static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ ReStateNumber *pToFree; unsigned int i = 0; unsigned int iSwap = 0; - int c = RE_EOF+1; + int c = RE_START; int cPrev = 0; int rc = 0; ReInput in; @@ -219,6 +247,7 @@ static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ in.i++; } if( in.i+pRe->nInit>in.mx ) return 0; + c = RE_START-1; } if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){ @@ -247,6 +276,10 @@ static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ if( pRe->aArg[x]==c ) re_add_state(pNext, x+1); break; } + case RE_OP_ATSTART: { + if( cPrev==RE_START ) re_add_state(pThis, x+1); + break; + } case RE_OP_ANY: { if( c!=0 ) re_add_state(pNext, x+1); break; @@ -328,7 +361,9 @@ static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ } } for(i=0; i<pNext->nState; i++){ - if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; } + int x = pNext->aState[i]; + while( pRe->aOp[x]==RE_OP_GOTO ) x += pRe->aArg[x]; + if( pRe->aOp[x]==RE_OP_ACCEPT ){ rc = 1; break; } } re_match_end: sqlite3_free(pToFree); @@ -483,7 +518,6 @@ static const char *re_subcompile_string(ReCompiled *p){ iStart = p->nState; switch( c ){ case '|': - case '$': case ')': { p->sIn.i--; return 0; @@ -520,6 +554,14 @@ static const char *re_subcompile_string(ReCompiled *p){ re_insert(p, iPrev, RE_OP_FORK, p->nState - iPrev+1); break; } + case '$': { + re_append(p, RE_OP_MATCH, RE_EOF); + break; + } + case '^': { + re_append(p, RE_OP_ATSTART, 0); + break; + } case '{': { int m = 0, n = 0; int sz, j; @@ -538,6 +580,7 @@ static const char *re_subcompile_string(ReCompiled *p){ if( m==0 ){ if( n==0 ) return "both m and n are zero in '{m,n}'"; re_insert(p, iPrev, RE_OP_FORK, sz+1); + iPrev++; n--; }else{ for(j=1; j<m; j++) re_copy(p, iPrev, sz); @@ -656,11 +699,7 @@ static const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){ re_free(pRe); return zErr; } - if( rePeek(pRe)=='$' && pRe->sIn.i+1>=pRe->sIn.mx ){ - re_append(pRe, RE_OP_MATCH, RE_EOF); - re_append(pRe, RE_OP_ACCEPT, 0); - *ppRe = pRe; - }else if( pRe->sIn.i>=pRe->sIn.mx ){ + if( pRe->sIn.i>=pRe->sIn.mx ){ re_append(pRe, RE_OP_ACCEPT, 0); *ppRe = pRe; }else{ @@ -673,15 +712,15 @@ static const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){ ** one or more matching characters, enter those matching characters into ** zInit[]. The re_match() routine can then search ahead in the input ** string looking for the initial match without having to run the whole - ** regex engine over the string. Do not worry able trying to match + ** regex engine over the string. Do not worry about trying to match ** unicode characters beyond plane 0 - those are very rare and this is ** just an optimization. */ if( pRe->aOp[0]==RE_OP_ANYSTAR && !noCase ){ for(j=0, i=1; j<(int)sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){ unsigned x = pRe->aArg[i]; - if( x<=127 ){ + if( x<=0x7f ){ pRe->zInit[j++] = (unsigned char)x; - }else if( x<=0xfff ){ + }else if( x<=0x7ff ){ pRe->zInit[j++] = (unsigned char)(0xc0 | (x>>6)); pRe->zInit[j++] = 0x80 | (x&0x3f); }else if( x<=0xffff ){ @@ -744,6 +783,68 @@ static void re_sql_func( } } +#if defined(SQLITE_DEBUG) +/* +** This function is used for testing and debugging only. It is only available +** if the SQLITE_DEBUG compile-time option is used. +** +** Compile a regular expression and then convert the compiled expression into +** text and return that text. +*/ +static void re_bytecode_func( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const char *zPattern; + const char *zErr; + ReCompiled *pRe; + sqlite3_str *pStr; + int i; + int n; + char *z; + (void)argc; + + zPattern = (const char*)sqlite3_value_text(argv[0]); + if( zPattern==0 ) return; + zErr = re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0); + if( zErr ){ + re_free(pRe); + sqlite3_result_error(context, zErr, -1); + return; + } + if( pRe==0 ){ + sqlite3_result_error_nomem(context); + return; + } + pStr = sqlite3_str_new(0); + if( pStr==0 ) goto re_bytecode_func_err; + if( pRe->nInit>0 ){ + sqlite3_str_appendf(pStr, "INIT "); + for(i=0; i<pRe->nInit; i++){ + sqlite3_str_appendf(pStr, "%02x", pRe->zInit[i]); + } + sqlite3_str_appendf(pStr, "\n"); + } + for(i=0; (unsigned)i<pRe->nState; i++){ + sqlite3_str_appendf(pStr, "%-8s %4d\n", + ReOpName[(unsigned char)pRe->aOp[i]], pRe->aArg[i]); + } + n = sqlite3_str_length(pStr); + z = sqlite3_str_finish(pStr); + if( n==0 ){ + sqlite3_free(z); + }else{ + sqlite3_result_text(context, z, n-1, sqlite3_free); + } + +re_bytecode_func_err: + re_free(pRe); +} + +#endif /* SQLITE_DEBUG */ + + /* ** Invoke this routine to register the regexp() function with the ** SQLite database connection. @@ -768,6 +869,13 @@ int sqlite3_regexp_init( rc = sqlite3_create_function(db, "regexpi", 2, SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC, (void*)db, re_sql_func, 0, 0); +#if defined(SQLITE_DEBUG) + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "regexp_bytecode", 1, + SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC, + 0, re_bytecode_func, 0, 0); + } +#endif /* SQLITE_DEBUG */ } return rc; } |