diff options
author | unknown <bar@gw.udmsearch.izhnet.ru> | 2002-03-06 20:04:13 +0400 |
---|---|---|
committer | unknown <bar@gw.udmsearch.izhnet.ru> | 2002-03-06 20:04:13 +0400 |
commit | 654db69b8247a7e6be4e039a558f635af9abae57 (patch) | |
tree | c6287ade2a44504c39beaf6f8b3da5437184fae4 | |
parent | 325c22a784e54c0c26271d55896b60538c211724 (diff) | |
download | mariadb-git-654db69b8247a7e6be4e039a558f635af9abae57.tar.gz |
Regex library is switched to use new ctype tools
to allow usage of many character sets at a time.
include/m_ctype.h:
Added condition to simplify migrating from old ctype
Added new style toupper, tolower which accepts charset in first argument
regex/debug.c:
Added charset argument
regex/debug.ih:
added charset argument
regex/engine.c:
added charset argument
regex/engine.ih:
added charset arguent
regex/main.c:
added charset argument
regex/regcomp.c:
added CHARSET_INFO field
regex/regcomp.ih:
Added charset argument
regex/regex.h:
Added #include <m_ctype.h> for CHARSET_INFO
Added charset argument for regcomp()
regex/regex2.h:
New charset argument for ISWORD()
regex/regexec.c:
New charset argument
regex/reginit.c:
Move to new style ctype.
However still needs fixes:
instead of single static cclass variable,
each charset must have it's own variable.
sql/item_cmpfunc.cc:
Pass charset field into regcomp()
This will be fixed tommorow to use String->charset
instead of default_charset_info
-rw-r--r-- | include/m_ctype.h | 5 | ||||
-rw-r--r-- | regex/debug.c | 22 | ||||
-rw-r--r-- | regex/debug.ih | 4 | ||||
-rw-r--r-- | regex/engine.c | 97 | ||||
-rw-r--r-- | regex/engine.ih | 10 | ||||
-rw-r--r-- | regex/main.c | 4 | ||||
-rw-r--r-- | regex/regcomp.c | 42 | ||||
-rw-r--r-- | regex/regcomp.ih | 2 | ||||
-rw-r--r-- | regex/regex.h | 5 | ||||
-rw-r--r-- | regex/regex2.h | 2 | ||||
-rw-r--r-- | regex/regexec.c | 4 | ||||
-rw-r--r-- | regex/reginit.c | 23 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 6 |
13 files changed, 126 insertions, 100 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index daeffdc2fee..998841c14d4 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -180,6 +180,7 @@ extern const char *compiled_charset_name(uint charset_number); #define _B 0100 /* Blank */ #define _X 0200 /* heXadecimal digit */ +#ifndef HIDE_OLD_CTYPE #define my_ctype (default_charset_info->ctype) #define my_to_upper (default_charset_info->to_upper) #define my_to_lower (default_charset_info->to_lower) @@ -201,6 +202,8 @@ extern const char *compiled_charset_name(uint charset_number); #define isprint(c) ((my_ctype+1)[(uchar) (c)] & (_P | _U | _L | _N | _B)) #define isgraph(c) ((my_ctype+1)[(uchar) (c)] & (_P | _U | _L | _N)) #define iscntrl(c) ((my_ctype+1)[(uchar) (c)] & _C) +#endif + #define isascii(c) (!((c) & ~0177)) #define toascii(c) ((c) & 0177) @@ -208,6 +211,8 @@ extern const char *compiled_charset_name(uint charset_number); #undef ctype #endif /* ctype */ +#define my_toupper(s,c) (char) ((s)->to_upper[(uchar) (c)]) +#define my_tolower(s,c) (char) ((s)->to_lower[(uchar) (c)]) #define my_isalpha(s, c) (((s)->ctype+1)[(uchar) (c)] & (_U | _L)) #define my_isupper(s, c) (((s)->ctype+1)[(uchar) (c)] & _U) #define my_islower(s, c) (((s)->ctype+1)[(uchar) (c)] & _L) diff --git a/regex/debug.c b/regex/debug.c index 35279941d48..bdd3e00d5a7 100644 --- a/regex/debug.c +++ b/regex/debug.c @@ -45,7 +45,7 @@ FILE *d; if (g->nplus > 0) fprintf(d, ", nplus %ld", (long)g->nplus); fprintf(d, "\n"); - s_print(g, d); + s_print(r->charset, g, d); for (i = 0; i < g->ncategories; i++) { nincat[i] = 0; for (c = CHAR_MIN; c <= CHAR_MAX; c++) @@ -58,7 +58,7 @@ FILE *d; for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (g->categories[c] == i) break; - fprintf(d, ", %d=%s", i, regchar(c,buf)); + fprintf(d, ", %d=%s", i, regchar(r->charset,c,buf)); } fprintf(d, "\n"); for (i = 1; i < g->ncategories; i++) @@ -68,14 +68,14 @@ FILE *d; for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */ if (c <= CHAR_MAX && g->categories[c] == i) { if (last < 0) { - fprintf(d, "%s", regchar(c,buf)); + fprintf(d, "%s", regchar(r->charset,c,buf)); last = c; } } else { if (last >= 0) { if (last != c-1) fprintf(d, "-%s", - regchar(c-1,buf)); + regchar(r->charset,c-1,buf)); last = -1; } } @@ -88,7 +88,8 @@ FILE *d; == static void s_print(register struct re_guts *g, FILE *d); */ static void -s_print(g, d) +s_print(charset, g, d) +CHARSET_INFO *charset; register struct re_guts *g; FILE *d; { @@ -127,7 +128,7 @@ FILE *d; if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL) fprintf(d, "\\%c", (char)opnd); else - fprintf(d, "%s", regchar((char)opnd,buf)); + fprintf(d, "%s", regchar(charset,(char)opnd,buf)); break; case OBOL: fprintf(d, "^"); @@ -151,14 +152,14 @@ FILE *d; for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */ if (CHIN(cs, i) && i < g->csetsize) { if (last < 0) { - fprintf(d, "%s", regchar(i,buf)); + fprintf(d, "%s", regchar(charset,i,buf)); last = i; } } else { if (last >= 0) { if (last != i-1) fprintf(d, "-%s", - regchar(i-1,buf)); + regchar(charset,i-1,buf)); last = -1; } } @@ -230,12 +231,13 @@ FILE *d; == static char *regchar(int ch); */ static char * /* -> representation */ -regchar(ch,buf) +regchar(charset,ch,buf) +CHARSET_INFO *charset; int ch; char *buf; { - if (isprint(ch) || ch == ' ') + if (my_isprint(charset,ch) || ch == ' ') sprintf(buf, "%c", ch); else sprintf(buf, "\\%o", ch); diff --git a/regex/debug.ih b/regex/debug.ih index 532ec5086e5..0d91e170437 100644 --- a/regex/debug.ih +++ b/regex/debug.ih @@ -5,8 +5,8 @@ extern "C" { /* === debug.c === */ void regprint(regex_t *r, FILE *d); -static void s_print(register struct re_guts *g, FILE *d); -static char *regchar(int ch,char *buf); +static void s_print(CHARSET_INFO *charset, register struct re_guts *g, FILE *d); +static char *regchar(CHARSET_INFO *charset, int ch,char *buf); #ifdef __cplusplus } diff --git a/regex/engine.c b/regex/engine.c index e77fbd7cf89..6734560b9bf 100644 --- a/regex/engine.c +++ b/regex/engine.c @@ -63,7 +63,8 @@ struct match { == size_t nmatch, regmatch_t pmatch[], int eflags); */ static int /* 0 success, REG_NOMATCH failure */ -matcher(g, str, nmatch, pmatch, eflags) +matcher(charset,g, str, nmatch, pmatch, eflags) +CHARSET_INFO *charset; register struct re_guts *g; char *str; size_t nmatch; @@ -120,7 +121,7 @@ int eflags; /* this loop does only one repetition except for backrefs */ for (;;) { - endp = fast(m, start, stop, gf, gl); + endp = fast(charset, m, start, stop, gf, gl); if (endp == NULL) { /* a miss */ if (m->pmatch != NULL) free((char *)m->pmatch); @@ -136,7 +137,7 @@ int eflags; assert(m->coldp != NULL); for (;;) { NOTE("finding start"); - endp = slow(m, m->coldp, stop, gf, gl); + endp = slow(charset, m, m->coldp, stop, gf, gl); if (endp != NULL) break; assert(m->coldp < m->endp); @@ -159,7 +160,7 @@ int eflags; m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; if (!g->backrefs && !(m->eflags®_BACKR)) { NOTE("dissecting"); - dp = dissect(m, m->coldp, endp, gf, gl); + dp = dissect(charset, m, m->coldp, endp, gf, gl); } else { if (g->nplus > 0 && m->lastpos == NULL) m->lastpos = (char **)malloc((g->nplus+1) * @@ -170,7 +171,7 @@ int eflags; return(REG_ESPACE); } NOTE("backref dissect"); - dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); + dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0); } if (dp != NULL) break; @@ -182,7 +183,7 @@ int eflags; if (dp != NULL || endp <= m->coldp) break; /* defeat */ NOTE("backoff"); - endp = slow(m, m->coldp, endp-1, gf, gl); + endp = slow(charset, m, m->coldp, endp-1, gf, gl); if (endp == NULL) break; /* defeat */ /* try it on a shorter possibility */ @@ -193,7 +194,7 @@ int eflags; } #endif NOTE("backoff dissect"); - dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); + dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0); } assert(dp == NULL || dp == endp); if (dp != NULL) /* found a shorter one */ @@ -235,7 +236,8 @@ int eflags; == char *stop, sopno startst, sopno stopst); */ static char * /* == stop (success) always */ -dissect(m, start, stop, startst, stopst) +dissect(charset, m, start, stop, startst, stopst) +CHARSET_INFO *charset; register struct match *m; char *start; char *stop; @@ -299,10 +301,10 @@ sopno stopst; stp = stop; for (;;) { /* how long could this one be? */ - rest = slow(m, sp, stp, ss, es); + rest = slow(charset, m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ - tail = slow(m, rest, stop, es, stopst); + tail = slow(charset, m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ @@ -312,8 +314,8 @@ sopno stopst; ssub = ss + 1; esub = es - 1; /* did innards match? */ - if (slow(m, sp, rest, ssub, esub) != NULL) { - dp = dissect(m, sp, rest, ssub, esub); + if (slow(charset, m, sp, rest, ssub, esub) != NULL) { + dp = dissect(charset, m, sp, rest, ssub, esub); assert(dp == rest); } else /* no */ assert(sp == rest); @@ -323,10 +325,10 @@ sopno stopst; stp = stop; for (;;) { /* how long could this one be? */ - rest = slow(m, sp, stp, ss, es); + rest = slow(charset, m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ - tail = slow(m, rest, stop, es, stopst); + tail = slow(charset, m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ @@ -338,7 +340,7 @@ sopno stopst; ssp = sp; oldssp = ssp; for (;;) { /* find last match of innards */ - sep = slow(m, ssp, rest, ssub, esub); + sep = slow(charset, m, ssp, rest, ssub, esub); if (sep == NULL || sep == ssp) break; /* failed or matched null */ oldssp = ssp; /* on to next try */ @@ -350,8 +352,8 @@ sopno stopst; ssp = oldssp; } assert(sep == rest); /* must exhaust substring */ - assert(slow(m, ssp, sep, ssub, esub) == rest); - dp = dissect(m, ssp, sep, ssub, esub); + assert(slow(charset, m, ssp, sep, ssub, esub) == rest); + dp = dissect(charset, m, ssp, sep, ssub, esub); assert(dp == sep); sp = rest; break; @@ -359,10 +361,10 @@ sopno stopst; stp = stop; for (;;) { /* how long could this one be? */ - rest = slow(m, sp, stp, ss, es); + rest = slow(charset, m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ - tail = slow(m, rest, stop, es, stopst); + tail = slow(charset, m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ @@ -373,7 +375,7 @@ sopno stopst; esub = ss + OPND(m->g->strip[ss]) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ - if (slow(m, sp, rest, ssub, esub) == rest) + if (slow(charset, m, sp, rest, ssub, esub) == rest) break; /* it matched all of it */ /* that one missed, try next one */ assert(OP(m->g->strip[esub]) == OOR1); @@ -386,7 +388,7 @@ sopno stopst; else assert(OP(m->g->strip[esub]) == O_CH); } - dp = dissect(m, sp, rest, ssub, esub); + dp = dissect(charset, m, sp, rest, ssub, esub); assert(dp == rest); sp = rest; break; @@ -423,7 +425,8 @@ sopno stopst; == char *stop, sopno startst, sopno stopst, sopno lev); */ static char * /* == stop (success) or NULL (failure) */ -backref(m, start, stop, startst, stopst, lev) +backref(charset,m, start, stop, startst, stopst, lev) +CHARSET_INFO *charset; register struct match *m; char *start; char *stop; @@ -486,8 +489,8 @@ sopno lev; /* PLUS nesting level */ (sp < m->endp && *(sp-1) == '\n' && (m->g->cflags®_NEWLINE)) || (sp > m->beginp && - !ISWORD(*(sp-1))) ) && - (sp < m->endp && ISWORD(*sp)) ) + !ISWORD(charset,*(sp-1))) ) && + (sp < m->endp && ISWORD(charset,*sp)) ) { /* yes */ } else return(NULL); @@ -496,8 +499,8 @@ sopno lev; /* PLUS nesting level */ if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || (sp < m->endp && *sp == '\n' && (m->g->cflags®_NEWLINE)) || - (sp < m->endp && !ISWORD(*sp)) ) && - (sp > m->beginp && ISWORD(*(sp-1))) ) + (sp < m->endp && !ISWORD(charset,*sp)) ) && + (sp > m->beginp && ISWORD(charset,*(sp-1))) ) { /* yes */ } else return(NULL); @@ -543,28 +546,28 @@ sopno lev; /* PLUS nesting level */ return(NULL); while (m->g->strip[ss] != SOP(O_BACK, i)) ss++; - return(backref(m, sp+len, stop, ss+1, stopst, lev)); + return(backref(charset, m, sp+len, stop, ss+1, stopst, lev)); break; case OQUEST_: /* to null or not */ - dp = backref(m, sp, stop, ss+1, stopst, lev); + dp = backref(charset, m, sp, stop, ss+1, stopst, lev); if (dp != NULL) return(dp); /* not */ - return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); + return(backref(charset, m, sp, stop, ss+OPND(s)+1, stopst, lev)); break; case OPLUS_: assert(m->lastpos != NULL); assert(lev+1 <= m->g->nplus); m->lastpos[lev+1] = sp; - return(backref(m, sp, stop, ss+1, stopst, lev+1)); + return(backref(charset, m, sp, stop, ss+1, stopst, lev+1)); break; case O_PLUS: if (sp == m->lastpos[lev]) /* last pass matched null */ - return(backref(m, sp, stop, ss+1, stopst, lev-1)); + return(backref(charset, m, sp, stop, ss+1, stopst, lev-1)); /* try another pass */ m->lastpos[lev] = sp; - dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); + dp = backref(charset, m, sp, stop, ss-OPND(s)+1, stopst, lev); if (dp == NULL) - return(backref(m, sp, stop, ss+1, stopst, lev-1)); + return(backref(charset, m, sp, stop, ss+1, stopst, lev-1)); else return(dp); break; @@ -573,7 +576,7 @@ sopno lev; /* PLUS nesting level */ esub = ss + OPND(s) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ - dp = backref(m, sp, stop, ssub, esub, lev); + dp = backref(charset, m, sp, stop, ssub, esub, lev); if (dp != NULL) return(dp); /* that one missed, try next one */ @@ -594,7 +597,7 @@ sopno lev; /* PLUS nesting level */ assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_so; m->pmatch[i].rm_so = sp - m->offp; - dp = backref(m, sp, stop, ss+1, stopst, lev); + dp = backref(charset, m, sp, stop, ss+1, stopst, lev); if (dp != NULL) return(dp); m->pmatch[i].rm_so = offsave; @@ -605,7 +608,7 @@ sopno lev; /* PLUS nesting level */ assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_eo; m->pmatch[i].rm_eo = sp - m->offp; - dp = backref(m, sp, stop, ss+1, stopst, lev); + dp = backref(charset, m, sp, stop, ss+1, stopst, lev); if (dp != NULL) return(dp); m->pmatch[i].rm_eo = offsave; @@ -628,7 +631,8 @@ sopno lev; /* PLUS nesting level */ == char *stop, sopno startst, sopno stopst); */ static char * /* where tentative match ended, or NULL */ -fast(m, start, stop, startst, stopst) +fast(charset, m, start, stop, startst, stopst) +CHARSET_INFO *charset; register struct match *m; char *start; char *stop; @@ -678,12 +682,12 @@ sopno stopst; } /* how about a word boundary? */ - if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c)) ) { + if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) && + (c != OUT && ISWORD(charset,c)) ) { flagch = BOW; } - if ( (lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + if ( (lastc != OUT && ISWORD(charset,lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { @@ -719,7 +723,8 @@ sopno stopst; == char *stop, sopno startst, sopno stopst); */ static char * /* where it ended */ -slow(m, start, stop, startst, stopst) +slow(charset, m, start, stop, startst, stopst) +CHARSET_INFO *charset; register struct match *m; char *start; char *stop; @@ -767,12 +772,12 @@ sopno stopst; } /* how about a word boundary? */ - if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c)) ) { + if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) && + (c != OUT && ISWORD(charset,c)) ) { flagch = BOW; } - if ( (lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + if ( (lastc != OUT && ISWORD(charset,lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { diff --git a/regex/engine.ih b/regex/engine.ih index c65733b5756..7cfcb39fb2d 100644 --- a/regex/engine.ih +++ b/regex/engine.ih @@ -4,11 +4,11 @@ extern "C" { #endif /* === engine.c === */ -static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags); -static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); -static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev); -static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); -static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); +static int matcher(CHARSET_INFO *charset,register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags); +static char *dissect(CHARSET_INFO *charset,register struct match *m, char *start, char *stop, sopno startst, sopno stopst); +static char *backref(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev); +static char *fast(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst); +static char *slow(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst); static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft); #define BOL (OUT+1) #define EOL (BOL+1) diff --git a/regex/main.c b/regex/main.c index 7844a4d8384..70d75e6ec50 100644 --- a/regex/main.c +++ b/regex/main.c @@ -74,7 +74,7 @@ char *argv[]; exit(status); } - err = regcomp(&re, argv[optind++], copts); + err = regcomp(&re, argv[optind++], copts, default_charset_info); if (err) { len = regerror(err, &re, erbuf, sizeof(erbuf)); fprintf(stderr, "error %s, %d/%d `%s'\n", @@ -226,7 +226,7 @@ int opts; /* may not match f1 */ strcpy(f0copy, f0); re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; fixstr(f0copy); - err = regcomp(&re, f0copy, opts); + err = regcomp(&re, f0copy, opts, default_charset_info); if (err != 0 && (!opt('C', f1) || err != efind(f2))) { /* unexpected error or wrong error */ len = regerror(err, &re, erbuf, sizeof(erbuf)); diff --git a/regex/regcomp.c b/regex/regcomp.c index 6f8221a706d..8a4ebbdfe17 100644 --- a/regex/regcomp.c +++ b/regex/regcomp.c @@ -28,6 +28,7 @@ struct parse { # define NPAREN 10 /* we need to remember () 1-9 for back refs */ sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ sopno pend[NPAREN]; /* -> ) ([0] unused) */ + CHARSET_INFO *charset; /* for ctype things */ }; #include "regcomp.ih" @@ -99,10 +100,11 @@ static int never = 0; /* for use in asserts; shuts lint up */ = #define REG_DUMP 0200 */ int /* 0 success, otherwise REG_something */ -regcomp(preg, pattern, cflags) +regcomp(preg, pattern, cflags, charset) regex_t *preg; const char *pattern; int cflags; +CHARSET_INFO *charset; { struct parse pa; register struct re_guts *g; @@ -116,6 +118,7 @@ int cflags; #endif regex_init(); /* Init cclass if neaded */ + preg->charset=charset; cflags = GOODFLAGS(cflags); if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) return(REG_INVARG); @@ -146,6 +149,7 @@ int cflags; p->end = p->next + len; p->error = 0; p->ncsalloc = 0; + p->charset = preg->charset; for (i = 0; i < NPAREN; i++) { p->pbegin[i] = 0; p->pend[i] = 0; @@ -327,7 +331,7 @@ register struct parse *p; ordinary(p, c); break; case '{': /* okay as ordinary except if digit follows */ - if(REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT)) {} + if(REQUIRE(!MORE() || !my_isdigit(p->charset,PEEK()), REG_BADRPT)) {} /* FALLTHROUGH */ default: ordinary(p, c); @@ -339,7 +343,8 @@ register struct parse *p; c = PEEK(); /* we call { a repetition if followed by a digit */ if (!( c == '*' || c == '+' || c == '?' || - (c == '{' && MORE2() && isdigit(PEEK2())) )) + (c == '{' && MORE2() && + my_isdigit(p->charset,PEEK2())) )) return; /* no repetition, we're done */ NEXT(); @@ -368,7 +373,7 @@ register struct parse *p; case '{': count = p_count(p); if (EAT(',')) { - if (isdigit(PEEK())) { + if (my_isdigit(p->charset,PEEK())) { count2 = p_count(p); if(REQUIRE(count <= count2, REG_BADBR)) {} } else /* single number with comma */ @@ -389,7 +394,8 @@ register struct parse *p; return; c = PEEK(); if (!( c == '*' || c == '+' || c == '?' || - (c == '{' && MORE2() && isdigit(PEEK2())) ) ) + (c == '{' && MORE2() && + my_isdigit(p->charset,PEEK2())) ) ) return; SETERROR(REG_BADRPT); } @@ -546,7 +552,7 @@ int starordinary; /* is a leading * an ordinary character? */ } else if (EATTWO('\\', '{')) { count = p_count(p); if (EAT(',')) { - if (MORE() && isdigit(PEEK())) { + if (MORE() && my_isdigit(p->charset,PEEK())) { count2 = p_count(p); if(REQUIRE(count <= count2, REG_BADBR)) {} } else /* single number with comma */ @@ -577,7 +583,7 @@ register struct parse *p; register int count = 0; register int ndigits = 0; - while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { + while (MORE() && my_isdigit(p->charset,PEEK()) && count <= DUPMAX) { count = count*10 + (GETNEXT() - '0'); ndigits++; } @@ -632,8 +638,8 @@ register struct parse *p; register int ci; for (i = p->g->csetsize - 1; i >= 0; i--) - if (CHIN(cs, i) && isalpha(i)) { - ci = othercase(i); + if (CHIN(cs, i) && my_isalpha(p->charset,i)) { + ci = othercase(p->charset,i); if (ci != i) CHadd(cs, ci); } @@ -744,7 +750,7 @@ register cset *cs; register char *u; register char c; - while (MORE() && isalpha(PEEK())) + while (MORE() && my_isalpha(p->charset,PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) @@ -837,14 +843,15 @@ int endc; /* name ended by endc,']' */ == static char othercase(int ch); */ static char /* if no counterpart, return ch */ -othercase(ch) +othercase(charset,ch) +CHARSET_INFO *charset; int ch; { - assert(isalpha(ch)); - if (isupper(ch)) - return(tolower(ch)); - else if (islower(ch)) - return(toupper(ch)); + assert(my_isalpha(charset,ch)); + if (my_isupper(charset,ch)) + return(my_tolower(charset,ch)); + else if (my_islower(charset,ch)) + return(my_toupper(charset,ch)); else /* peculiar, but could happen */ return(ch); } @@ -887,7 +894,8 @@ register int ch; { register cat_t *cap = p->g->categories; - if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) + if ((p->g->cflags®_ICASE) && my_isalpha(p->charset,ch) && + othercase(p->charset,ch) != ch) bothcases(p, ch); else { EMIT(OCHAR, (unsigned char)ch); diff --git a/regex/regcomp.ih b/regex/regcomp.ih index 4ae45bbf4a9..32f1f6e89eb 100644 --- a/regex/regcomp.ih +++ b/regex/regcomp.ih @@ -16,7 +16,7 @@ static void p_b_cclass(register struct parse *p, register cset *cs); static void p_b_eclass(register struct parse *p, register cset *cs); static char p_b_symbol(register struct parse *p); static char p_b_coll_elem(register struct parse *p, int endc); -static char othercase(int ch); +static char othercase(CHARSET_INFO *charset,int ch); static void bothcases(register struct parse *p, int ch); static void ordinary(register struct parse *p, register int ch); static void nonnewline(register struct parse *p); diff --git a/regex/regex.h b/regex/regex.h index 99a0077251e..1602f66d190 100644 --- a/regex/regex.h +++ b/regex/regex.h @@ -5,6 +5,8 @@ extern "C" { #endif +#include "m_ctype.h" + /* === regex2.h === */ #ifdef _WIN64 typedef __int64 regoff_t; @@ -17,6 +19,7 @@ typedef struct { size_t re_nsub; /* number of parenthesized subexpressions */ const char *re_endp; /* end pointer for REG_PEND */ struct re_guts *re_g; /* none of your business :-) */ + CHARSET_INFO *charset; /* For ctype things */ } regex_t; typedef struct { regoff_t rm_so; /* start of match */ @@ -25,7 +28,7 @@ typedef struct { /* === regcomp.c === */ -extern int regcomp(regex_t *, const char *, int); +extern int regcomp(regex_t *, const char *, int, CHARSET_INFO *charset); #define REG_BASIC 0000 #define REG_EXTENDED 0001 #define REG_ICASE 0002 diff --git a/regex/regex2.h b/regex/regex2.h index 0d94baa310f..bba54ea2054 100644 --- a/regex/regex2.h +++ b/regex/regex2.h @@ -140,6 +140,6 @@ struct re_guts { /* misc utilities */ #undef OUT /* May be defined in windows */ #define OUT (CHAR_MAX+1) /* a non-character value */ -#define ISWORD(c) (isalnum(c) || (c) == '_') +#define ISWORD(s,c) (my_isalnum(s,c) || (c) == '_') #endif /* __regex2_h__ */ diff --git a/regex/regexec.c b/regex/regexec.c index 7f2704f8214..723289bd0ad 100644 --- a/regex/regexec.c +++ b/regex/regexec.c @@ -133,7 +133,7 @@ int eflags; if ((size_t) g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) - return(smatcher(g, (char *)str, nmatch, pmatch, eflags)); + return(smatcher(preg->charset, g, (char *)str, nmatch, pmatch, eflags)); else - return(lmatcher(g, (char *)str, nmatch, pmatch, eflags)); + return(lmatcher(preg->charset, g, (char *)str, nmatch, pmatch, eflags)); } diff --git a/regex/reginit.c b/regex/reginit.c index 18647c386fc..fe81f88a342 100644 --- a/regex/reginit.c +++ b/regex/reginit.c @@ -12,6 +12,7 @@ void regex_init() char buff[CCLASS_LAST][256]; int count[CCLASS_LAST]; uint i; + CHARSET_INFO *cs=default_charset_info; if (!regex_inited) { @@ -20,27 +21,27 @@ void regex_init() for (i=1 ; i<= 255; i++) { - if (isalnum(i)) + if (my_isalnum(cs,i)) buff[CCLASS_ALNUM][count[CCLASS_ALNUM]++]=(char) i; - if (isalpha(i)) + if (my_isalpha(cs,i)) buff[CCLASS_ALPHA][count[CCLASS_ALPHA]++]=(char) i; - if (iscntrl(i)) + if (my_iscntrl(cs,i)) buff[CCLASS_CNTRL][count[CCLASS_CNTRL]++]=(char) i; - if (isdigit(i)) + if (my_isdigit(cs,i)) buff[CCLASS_DIGIT][count[CCLASS_DIGIT]++]=(char) i; - if (isgraph(i)) + if (my_isgraph(cs,i)) buff[CCLASS_GRAPH][count[CCLASS_GRAPH]++]=(char) i; - if (islower(i)) + if (my_islower(cs,i)) buff[CCLASS_LOWER][count[CCLASS_LOWER]++]=(char) i; - if (isprint(i)) + if (my_isprint(cs,i)) buff[CCLASS_PRINT][count[CCLASS_PRINT]++]=(char) i; - if (ispunct(i)) + if (my_ispunct(cs,i)) buff[CCLASS_PUNCT][count[CCLASS_PUNCT]++]=(char) i; - if (isspace(i)) + if (my_isspace(cs,i)) buff[CCLASS_SPACE][count[CCLASS_SPACE]++]=(char) i; - if (isupper(i)) + if (my_isupper(cs,i)) buff[CCLASS_UPPER][count[CCLASS_UPPER]++]=(char) i; - if (isxdigit(i)) + if (my_isxdigit(cs,i)) buff[CCLASS_XDIGIT][count[CCLASS_XDIGIT]++]=(char) i; } buff[CCLASS_BLANK][0]=' '; diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 1de398177b5..a09c503e3d5 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -1278,7 +1278,8 @@ Item_func_regex::fix_fields(THD *thd,TABLE_LIST *tables) int error; if ((error=regcomp(&preg,res->c_ptr(), binary ? REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE))) + REG_EXTENDED | REG_NOSUB | REG_ICASE, + default_charset_info))) { (void) regerror(error,&preg,buff,sizeof(buff)); my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff); @@ -1325,7 +1326,8 @@ longlong Item_func_regex::val_int() } if (regcomp(&preg,res2->c_ptr(), binary ? REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE)) + REG_EXTENDED | REG_NOSUB | REG_ICASE, + default_charset_info)) { null_value=1; |