diff options
Diffstat (limited to 'regex')
-rw-r--r-- | regex/Makefile.am | 2 | ||||
-rw-r--r-- | regex/cclass.h | 1 | ||||
-rw-r--r-- | regex/debug.c | 22 | ||||
-rw-r--r-- | regex/debug.ih | 4 | ||||
-rw-r--r-- | regex/engine.c | 97 | ||||
-rw-r--r-- | regex/engine.ih | 10 | ||||
-rw-r--r-- | regex/main.c | 4 | ||||
-rw-r--r-- | regex/regcomp.c | 99 | ||||
-rw-r--r-- | regex/regcomp.ih | 2 | ||||
-rw-r--r-- | regex/regex.h | 7 | ||||
-rw-r--r-- | regex/regex2.h | 2 | ||||
-rw-r--r-- | regex/regexec.c | 4 | ||||
-rw-r--r-- | regex/reginit.c | 24 |
13 files changed, 155 insertions, 123 deletions
diff --git a/regex/Makefile.am b/regex/Makefile.am index ee421b70bcf..2e23efcbf2a 100644 --- a/regex/Makefile.am +++ b/regex/Makefile.am @@ -15,7 +15,7 @@ # Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, # MA 02111-1307, USA -INCLUDES = @MT_INCLUDES@ -I$(srcdir)/../include -I../include +INCLUDES = @MT_INCLUDES@ -I$(top_srcdir)/include noinst_LIBRARIES = libregex.a LDADD = libregex.a ../strings/libmystrings.a noinst_HEADERS = cclass.h cname.h regex2.h utils.h engine.c regex.h diff --git a/regex/cclass.h b/regex/cclass.h index e0f752f38b8..b877b5dee6b 100644 --- a/regex/cclass.h +++ b/regex/cclass.h @@ -18,4 +18,5 @@ extern struct cclass { const char *name; const char *chars; const char *multis; + uint mask; } cclasses[]; diff --git a/regex/debug.c b/regex/debug.c index 35279941d48..bdd3e00d5a7 100644 --- a/regex/debug.c +++ b/regex/debug.c @@ -45,7 +45,7 @@ FILE *d; if (g->nplus > 0) fprintf(d, ", nplus %ld", (long)g->nplus); fprintf(d, "\n"); - s_print(g, d); + s_print(r->charset, g, d); for (i = 0; i < g->ncategories; i++) { nincat[i] = 0; for (c = CHAR_MIN; c <= CHAR_MAX; c++) @@ -58,7 +58,7 @@ FILE *d; for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (g->categories[c] == i) break; - fprintf(d, ", %d=%s", i, regchar(c,buf)); + fprintf(d, ", %d=%s", i, regchar(r->charset,c,buf)); } fprintf(d, "\n"); for (i = 1; i < g->ncategories; i++) @@ -68,14 +68,14 @@ FILE *d; for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */ if (c <= CHAR_MAX && g->categories[c] == i) { if (last < 0) { - fprintf(d, "%s", regchar(c,buf)); + fprintf(d, "%s", regchar(r->charset,c,buf)); last = c; } } else { if (last >= 0) { if (last != c-1) fprintf(d, "-%s", - regchar(c-1,buf)); + regchar(r->charset,c-1,buf)); last = -1; } } @@ -88,7 +88,8 @@ FILE *d; == static void s_print(register struct re_guts *g, FILE *d); */ static void -s_print(g, d) +s_print(charset, g, d) +CHARSET_INFO *charset; register struct re_guts *g; FILE *d; { @@ -127,7 +128,7 @@ FILE *d; if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL) fprintf(d, "\\%c", (char)opnd); else - fprintf(d, "%s", regchar((char)opnd,buf)); + fprintf(d, "%s", regchar(charset,(char)opnd,buf)); break; case OBOL: fprintf(d, "^"); @@ -151,14 +152,14 @@ FILE *d; for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */ if (CHIN(cs, i) && i < g->csetsize) { if (last < 0) { - fprintf(d, "%s", regchar(i,buf)); + fprintf(d, "%s", regchar(charset,i,buf)); last = i; } } else { if (last >= 0) { if (last != i-1) fprintf(d, "-%s", - regchar(i-1,buf)); + regchar(charset,i-1,buf)); last = -1; } } @@ -230,12 +231,13 @@ FILE *d; == static char *regchar(int ch); */ static char * /* -> representation */ -regchar(ch,buf) +regchar(charset,ch,buf) +CHARSET_INFO *charset; int ch; char *buf; { - if (isprint(ch) || ch == ' ') + if (my_isprint(charset,ch) || ch == ' ') sprintf(buf, "%c", ch); else sprintf(buf, "\\%o", ch); diff --git a/regex/debug.ih b/regex/debug.ih index 532ec5086e5..0d91e170437 100644 --- a/regex/debug.ih +++ b/regex/debug.ih @@ -5,8 +5,8 @@ extern "C" { /* === debug.c === */ void regprint(regex_t *r, FILE *d); -static void s_print(register struct re_guts *g, FILE *d); -static char *regchar(int ch,char *buf); +static void s_print(CHARSET_INFO *charset, register struct re_guts *g, FILE *d); +static char *regchar(CHARSET_INFO *charset, int ch,char *buf); #ifdef __cplusplus } diff --git a/regex/engine.c b/regex/engine.c index e77fbd7cf89..6734560b9bf 100644 --- a/regex/engine.c +++ b/regex/engine.c @@ -63,7 +63,8 @@ struct match { == size_t nmatch, regmatch_t pmatch[], int eflags); */ static int /* 0 success, REG_NOMATCH failure */ -matcher(g, str, nmatch, pmatch, eflags) +matcher(charset,g, str, nmatch, pmatch, eflags) +CHARSET_INFO *charset; register struct re_guts *g; char *str; size_t nmatch; @@ -120,7 +121,7 @@ int eflags; /* this loop does only one repetition except for backrefs */ for (;;) { - endp = fast(m, start, stop, gf, gl); + endp = fast(charset, m, start, stop, gf, gl); if (endp == NULL) { /* a miss */ if (m->pmatch != NULL) free((char *)m->pmatch); @@ -136,7 +137,7 @@ int eflags; assert(m->coldp != NULL); for (;;) { NOTE("finding start"); - endp = slow(m, m->coldp, stop, gf, gl); + endp = slow(charset, m, m->coldp, stop, gf, gl); if (endp != NULL) break; assert(m->coldp < m->endp); @@ -159,7 +160,7 @@ int eflags; m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; if (!g->backrefs && !(m->eflags®_BACKR)) { NOTE("dissecting"); - dp = dissect(m, m->coldp, endp, gf, gl); + dp = dissect(charset, m, m->coldp, endp, gf, gl); } else { if (g->nplus > 0 && m->lastpos == NULL) m->lastpos = (char **)malloc((g->nplus+1) * @@ -170,7 +171,7 @@ int eflags; return(REG_ESPACE); } NOTE("backref dissect"); - dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); + dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0); } if (dp != NULL) break; @@ -182,7 +183,7 @@ int eflags; if (dp != NULL || endp <= m->coldp) break; /* defeat */ NOTE("backoff"); - endp = slow(m, m->coldp, endp-1, gf, gl); + endp = slow(charset, m, m->coldp, endp-1, gf, gl); if (endp == NULL) break; /* defeat */ /* try it on a shorter possibility */ @@ -193,7 +194,7 @@ int eflags; } #endif NOTE("backoff dissect"); - dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); + dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0); } assert(dp == NULL || dp == endp); if (dp != NULL) /* found a shorter one */ @@ -235,7 +236,8 @@ int eflags; == char *stop, sopno startst, sopno stopst); */ static char * /* == stop (success) always */ -dissect(m, start, stop, startst, stopst) +dissect(charset, m, start, stop, startst, stopst) +CHARSET_INFO *charset; register struct match *m; char *start; char *stop; @@ -299,10 +301,10 @@ sopno stopst; stp = stop; for (;;) { /* how long could this one be? */ - rest = slow(m, sp, stp, ss, es); + rest = slow(charset, m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ - tail = slow(m, rest, stop, es, stopst); + tail = slow(charset, m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ @@ -312,8 +314,8 @@ sopno stopst; ssub = ss + 1; esub = es - 1; /* did innards match? */ - if (slow(m, sp, rest, ssub, esub) != NULL) { - dp = dissect(m, sp, rest, ssub, esub); + if (slow(charset, m, sp, rest, ssub, esub) != NULL) { + dp = dissect(charset, m, sp, rest, ssub, esub); assert(dp == rest); } else /* no */ assert(sp == rest); @@ -323,10 +325,10 @@ sopno stopst; stp = stop; for (;;) { /* how long could this one be? */ - rest = slow(m, sp, stp, ss, es); + rest = slow(charset, m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ - tail = slow(m, rest, stop, es, stopst); + tail = slow(charset, m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ @@ -338,7 +340,7 @@ sopno stopst; ssp = sp; oldssp = ssp; for (;;) { /* find last match of innards */ - sep = slow(m, ssp, rest, ssub, esub); + sep = slow(charset, m, ssp, rest, ssub, esub); if (sep == NULL || sep == ssp) break; /* failed or matched null */ oldssp = ssp; /* on to next try */ @@ -350,8 +352,8 @@ sopno stopst; ssp = oldssp; } assert(sep == rest); /* must exhaust substring */ - assert(slow(m, ssp, sep, ssub, esub) == rest); - dp = dissect(m, ssp, sep, ssub, esub); + assert(slow(charset, m, ssp, sep, ssub, esub) == rest); + dp = dissect(charset, m, ssp, sep, ssub, esub); assert(dp == sep); sp = rest; break; @@ -359,10 +361,10 @@ sopno stopst; stp = stop; for (;;) { /* how long could this one be? */ - rest = slow(m, sp, stp, ss, es); + rest = slow(charset, m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ - tail = slow(m, rest, stop, es, stopst); + tail = slow(charset, m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ @@ -373,7 +375,7 @@ sopno stopst; esub = ss + OPND(m->g->strip[ss]) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ - if (slow(m, sp, rest, ssub, esub) == rest) + if (slow(charset, m, sp, rest, ssub, esub) == rest) break; /* it matched all of it */ /* that one missed, try next one */ assert(OP(m->g->strip[esub]) == OOR1); @@ -386,7 +388,7 @@ sopno stopst; else assert(OP(m->g->strip[esub]) == O_CH); } - dp = dissect(m, sp, rest, ssub, esub); + dp = dissect(charset, m, sp, rest, ssub, esub); assert(dp == rest); sp = rest; break; @@ -423,7 +425,8 @@ sopno stopst; == char *stop, sopno startst, sopno stopst, sopno lev); */ static char * /* == stop (success) or NULL (failure) */ -backref(m, start, stop, startst, stopst, lev) +backref(charset,m, start, stop, startst, stopst, lev) +CHARSET_INFO *charset; register struct match *m; char *start; char *stop; @@ -486,8 +489,8 @@ sopno lev; /* PLUS nesting level */ (sp < m->endp && *(sp-1) == '\n' && (m->g->cflags®_NEWLINE)) || (sp > m->beginp && - !ISWORD(*(sp-1))) ) && - (sp < m->endp && ISWORD(*sp)) ) + !ISWORD(charset,*(sp-1))) ) && + (sp < m->endp && ISWORD(charset,*sp)) ) { /* yes */ } else return(NULL); @@ -496,8 +499,8 @@ sopno lev; /* PLUS nesting level */ if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || (sp < m->endp && *sp == '\n' && (m->g->cflags®_NEWLINE)) || - (sp < m->endp && !ISWORD(*sp)) ) && - (sp > m->beginp && ISWORD(*(sp-1))) ) + (sp < m->endp && !ISWORD(charset,*sp)) ) && + (sp > m->beginp && ISWORD(charset,*(sp-1))) ) { /* yes */ } else return(NULL); @@ -543,28 +546,28 @@ sopno lev; /* PLUS nesting level */ return(NULL); while (m->g->strip[ss] != SOP(O_BACK, i)) ss++; - return(backref(m, sp+len, stop, ss+1, stopst, lev)); + return(backref(charset, m, sp+len, stop, ss+1, stopst, lev)); break; case OQUEST_: /* to null or not */ - dp = backref(m, sp, stop, ss+1, stopst, lev); + dp = backref(charset, m, sp, stop, ss+1, stopst, lev); if (dp != NULL) return(dp); /* not */ - return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); + return(backref(charset, m, sp, stop, ss+OPND(s)+1, stopst, lev)); break; case OPLUS_: assert(m->lastpos != NULL); assert(lev+1 <= m->g->nplus); m->lastpos[lev+1] = sp; - return(backref(m, sp, stop, ss+1, stopst, lev+1)); + return(backref(charset, m, sp, stop, ss+1, stopst, lev+1)); break; case O_PLUS: if (sp == m->lastpos[lev]) /* last pass matched null */ - return(backref(m, sp, stop, ss+1, stopst, lev-1)); + return(backref(charset, m, sp, stop, ss+1, stopst, lev-1)); /* try another pass */ m->lastpos[lev] = sp; - dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); + dp = backref(charset, m, sp, stop, ss-OPND(s)+1, stopst, lev); if (dp == NULL) - return(backref(m, sp, stop, ss+1, stopst, lev-1)); + return(backref(charset, m, sp, stop, ss+1, stopst, lev-1)); else return(dp); break; @@ -573,7 +576,7 @@ sopno lev; /* PLUS nesting level */ esub = ss + OPND(s) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ - dp = backref(m, sp, stop, ssub, esub, lev); + dp = backref(charset, m, sp, stop, ssub, esub, lev); if (dp != NULL) return(dp); /* that one missed, try next one */ @@ -594,7 +597,7 @@ sopno lev; /* PLUS nesting level */ assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_so; m->pmatch[i].rm_so = sp - m->offp; - dp = backref(m, sp, stop, ss+1, stopst, lev); + dp = backref(charset, m, sp, stop, ss+1, stopst, lev); if (dp != NULL) return(dp); m->pmatch[i].rm_so = offsave; @@ -605,7 +608,7 @@ sopno lev; /* PLUS nesting level */ assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_eo; m->pmatch[i].rm_eo = sp - m->offp; - dp = backref(m, sp, stop, ss+1, stopst, lev); + dp = backref(charset, m, sp, stop, ss+1, stopst, lev); if (dp != NULL) return(dp); m->pmatch[i].rm_eo = offsave; @@ -628,7 +631,8 @@ sopno lev; /* PLUS nesting level */ == char *stop, sopno startst, sopno stopst); */ static char * /* where tentative match ended, or NULL */ -fast(m, start, stop, startst, stopst) +fast(charset, m, start, stop, startst, stopst) +CHARSET_INFO *charset; register struct match *m; char *start; char *stop; @@ -678,12 +682,12 @@ sopno stopst; } /* how about a word boundary? */ - if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c)) ) { + if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) && + (c != OUT && ISWORD(charset,c)) ) { flagch = BOW; } - if ( (lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + if ( (lastc != OUT && ISWORD(charset,lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { @@ -719,7 +723,8 @@ sopno stopst; == char *stop, sopno startst, sopno stopst); */ static char * /* where it ended */ -slow(m, start, stop, startst, stopst) +slow(charset, m, start, stop, startst, stopst) +CHARSET_INFO *charset; register struct match *m; char *start; char *stop; @@ -767,12 +772,12 @@ sopno stopst; } /* how about a word boundary? */ - if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c)) ) { + if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) && + (c != OUT && ISWORD(charset,c)) ) { flagch = BOW; } - if ( (lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + if ( (lastc != OUT && ISWORD(charset,lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { diff --git a/regex/engine.ih b/regex/engine.ih index c65733b5756..7cfcb39fb2d 100644 --- a/regex/engine.ih +++ b/regex/engine.ih @@ -4,11 +4,11 @@ extern "C" { #endif /* === engine.c === */ -static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags); -static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); -static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev); -static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); -static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); +static int matcher(CHARSET_INFO *charset,register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags); +static char *dissect(CHARSET_INFO *charset,register struct match *m, char *start, char *stop, sopno startst, sopno stopst); +static char *backref(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev); +static char *fast(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst); +static char *slow(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst); static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft); #define BOL (OUT+1) #define EOL (BOL+1) diff --git a/regex/main.c b/regex/main.c index 7844a4d8384..f43a112a661 100644 --- a/regex/main.c +++ b/regex/main.c @@ -74,7 +74,7 @@ char *argv[]; exit(status); } - err = regcomp(&re, argv[optind++], copts); + err = regcomp(&re, argv[optind++], copts, &my_charset_latin1); if (err) { len = regerror(err, &re, erbuf, sizeof(erbuf)); fprintf(stderr, "error %s, %d/%d `%s'\n", @@ -226,7 +226,7 @@ int opts; /* may not match f1 */ strcpy(f0copy, f0); re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; fixstr(f0copy); - err = regcomp(&re, f0copy, opts); + err = regcomp(&re, f0copy, opts, &my_charset_latin1); if (err != 0 && (!opt('C', f1) || err != efind(f2))) { /* unexpected error or wrong error */ len = regerror(err, &re, erbuf, sizeof(erbuf)); diff --git a/regex/regcomp.c b/regex/regcomp.c index 6f8221a706d..6af99456c28 100644 --- a/regex/regcomp.c +++ b/regex/regcomp.c @@ -28,6 +28,7 @@ struct parse { # define NPAREN 10 /* we need to remember () 1-9 for back refs */ sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ sopno pend[NPAREN]; /* -> ) ([0] unused) */ + CHARSET_INFO *charset; /* for ctype things */ }; #include "regcomp.ih" @@ -35,19 +36,19 @@ struct parse { static char nuls[10]; /* place to point scanner in event of error */ struct cclass cclasses[CCLASS_LAST+1]= { - { "alnum", "","" }, - { "alpha", "","" }, - { "blank", "","" }, - { "cntrl", "","" }, - { "digit", "","" }, - { "graph", "","" }, - { "lower", "","" }, - { "print", "","" }, - { "punct", "","" }, - { "space", "","" }, - { "upper", "","" }, - { "xdigit", "","" }, - { NULL,NULL,NULL } + { "alnum", "","", _U | _L | _NMR}, + { "alpha", "","", _U | _L }, + { "blank", "","", _B }, + { "cntrl", "","", _CTR }, + { "digit", "","", _NMR }, + { "graph", "","", _PNT | _U | _L | _NMR}, + { "lower", "","", _L }, + { "print", "","", _PNT | _U | _L | _NMR | _B }, + { "punct", "","", _PNT }, + { "space", "","", _SPC }, + { "upper", "","", _U }, + { "xdigit", "","", _X }, + { NULL,NULL,NULL, 0 } }; /* @@ -99,10 +100,11 @@ static int never = 0; /* for use in asserts; shuts lint up */ = #define REG_DUMP 0200 */ int /* 0 success, otherwise REG_something */ -regcomp(preg, pattern, cflags) +regcomp(preg, pattern, cflags, charset) regex_t *preg; const char *pattern; int cflags; +CHARSET_INFO *charset; { struct parse pa; register struct re_guts *g; @@ -115,7 +117,8 @@ int cflags; # define GOODFLAGS(f) ((f)&~REG_DUMP) #endif - regex_init(); /* Init cclass if neaded */ + regex_init(charset); /* Init cclass if neaded */ + preg->charset=charset; cflags = GOODFLAGS(cflags); if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) return(REG_INVARG); @@ -146,6 +149,7 @@ int cflags; p->end = p->next + len; p->error = 0; p->ncsalloc = 0; + p->charset = preg->charset; for (i = 0; i < NPAREN; i++) { p->pbegin[i] = 0; p->pend[i] = 0; @@ -327,7 +331,7 @@ register struct parse *p; ordinary(p, c); break; case '{': /* okay as ordinary except if digit follows */ - if(REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT)) {} + if(REQUIRE(!MORE() || !my_isdigit(p->charset,PEEK()), REG_BADRPT)) {} /* FALLTHROUGH */ default: ordinary(p, c); @@ -339,7 +343,8 @@ register struct parse *p; c = PEEK(); /* we call { a repetition if followed by a digit */ if (!( c == '*' || c == '+' || c == '?' || - (c == '{' && MORE2() && isdigit(PEEK2())) )) + (c == '{' && MORE2() && + my_isdigit(p->charset,PEEK2())) )) return; /* no repetition, we're done */ NEXT(); @@ -368,7 +373,7 @@ register struct parse *p; case '{': count = p_count(p); if (EAT(',')) { - if (isdigit(PEEK())) { + if (my_isdigit(p->charset,PEEK())) { count2 = p_count(p); if(REQUIRE(count <= count2, REG_BADBR)) {} } else /* single number with comma */ @@ -389,7 +394,8 @@ register struct parse *p; return; c = PEEK(); if (!( c == '*' || c == '+' || c == '?' || - (c == '{' && MORE2() && isdigit(PEEK2())) ) ) + (c == '{' && MORE2() && + my_isdigit(p->charset,PEEK2())) ) ) return; SETERROR(REG_BADRPT); } @@ -546,7 +552,7 @@ int starordinary; /* is a leading * an ordinary character? */ } else if (EATTWO('\\', '{')) { count = p_count(p); if (EAT(',')) { - if (MORE() && isdigit(PEEK())) { + if (MORE() && my_isdigit(p->charset,PEEK())) { count2 = p_count(p); if(REQUIRE(count <= count2, REG_BADBR)) {} } else /* single number with comma */ @@ -577,7 +583,7 @@ register struct parse *p; register int count = 0; register int ndigits = 0; - while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { + while (MORE() && my_isdigit(p->charset,PEEK()) && count <= DUPMAX) { count = count*10 + (GETNEXT() - '0'); ndigits++; } @@ -632,8 +638,8 @@ register struct parse *p; register int ci; for (i = p->g->csetsize - 1; i >= 0; i--) - if (CHIN(cs, i) && isalpha(i)) { - ci = othercase(i); + if (CHIN(cs, i) && my_isalpha(p->charset,i)) { + ci = othercase(p->charset,i); if (ci != i) CHadd(cs, ci); } @@ -741,10 +747,8 @@ register cset *cs; register char *sp = p->next; register struct cclass *cp; register size_t len; - register char *u; - register char c; - - while (MORE() && isalpha(PEEK())) + + while (MORE() && my_isalpha(p->charset,PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) @@ -756,11 +760,26 @@ register cset *cs; return; } - u = (char*) cp->chars; - while ((c = *u++) != '\0') - CHadd(cs, c); - for (u = (char*) cp->multis; *u != '\0'; u += strlen(u) + 1) - MCadd(p, cs, u); +#ifndef USE_ORIG_REGEX_CODE + { + register size_t i; + for (i=1 ; i<256 ; i++) + if (p->charset->ctype[i+1] & cp->mask) + CHadd(cs, i); + } +#else + { + register char *u = (char*) cp->chars; + register char c; + + while ((c = *u++) != '\0') + CHadd(cs, c); + + for (u = (char*) cp->multis; *u != '\0'; u += strlen(u) + 1) + MCadd(p, cs, u); + } +#endif + } /* @@ -837,14 +856,15 @@ int endc; /* name ended by endc,']' */ == static char othercase(int ch); */ static char /* if no counterpart, return ch */ -othercase(ch) +othercase(charset,ch) +CHARSET_INFO *charset; int ch; { - assert(isalpha(ch)); - if (isupper(ch)) - return(tolower(ch)); - else if (islower(ch)) - return(toupper(ch)); + assert(my_isalpha(charset,ch)); + if (my_isupper(charset,ch)) + return(my_tolower(charset,ch)); + else if (my_islower(charset,ch)) + return(my_toupper(charset,ch)); else /* peculiar, but could happen */ return(ch); } @@ -887,7 +907,8 @@ register int ch; { register cat_t *cap = p->g->categories; - if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) + if ((p->g->cflags®_ICASE) && my_isalpha(p->charset,ch) && + othercase(p->charset,ch) != ch) bothcases(p, ch); else { EMIT(OCHAR, (unsigned char)ch); diff --git a/regex/regcomp.ih b/regex/regcomp.ih index 4ae45bbf4a9..32f1f6e89eb 100644 --- a/regex/regcomp.ih +++ b/regex/regcomp.ih @@ -16,7 +16,7 @@ static void p_b_cclass(register struct parse *p, register cset *cs); static void p_b_eclass(register struct parse *p, register cset *cs); static char p_b_symbol(register struct parse *p); static char p_b_coll_elem(register struct parse *p, int endc); -static char othercase(int ch); +static char othercase(CHARSET_INFO *charset,int ch); static void bothcases(register struct parse *p, int ch); static void ordinary(register struct parse *p, register int ch); static void nonnewline(register struct parse *p); diff --git a/regex/regex.h b/regex/regex.h index 99a0077251e..e0fb0c77dc9 100644 --- a/regex/regex.h +++ b/regex/regex.h @@ -5,6 +5,8 @@ extern "C" { #endif +#include "m_ctype.h" + /* === regex2.h === */ #ifdef _WIN64 typedef __int64 regoff_t; @@ -17,6 +19,7 @@ typedef struct { size_t re_nsub; /* number of parenthesized subexpressions */ const char *re_endp; /* end pointer for REG_PEND */ struct re_guts *re_g; /* none of your business :-) */ + CHARSET_INFO *charset; /* For ctype things */ } regex_t; typedef struct { regoff_t rm_so; /* start of match */ @@ -25,7 +28,7 @@ typedef struct { /* === regcomp.c === */ -extern int regcomp(regex_t *, const char *, int); +extern int regcomp(regex_t *, const char *, int, CHARSET_INFO *charset); #define REG_BASIC 0000 #define REG_EXTENDED 0001 #define REG_ICASE 0002 @@ -73,7 +76,7 @@ extern void regfree(regex_t *); /* === reginit.c === */ -extern void regex_init(void); /* Should be called for multithread progs */ +extern void regex_init(CHARSET_INFO *cs); /* Should be called for multithread progs */ extern void regex_end(void); /* If one wants a clean end */ #ifdef __cplusplus diff --git a/regex/regex2.h b/regex/regex2.h index 0d94baa310f..bba54ea2054 100644 --- a/regex/regex2.h +++ b/regex/regex2.h @@ -140,6 +140,6 @@ struct re_guts { /* misc utilities */ #undef OUT /* May be defined in windows */ #define OUT (CHAR_MAX+1) /* a non-character value */ -#define ISWORD(c) (isalnum(c) || (c) == '_') +#define ISWORD(s,c) (my_isalnum(s,c) || (c) == '_') #endif /* __regex2_h__ */ diff --git a/regex/regexec.c b/regex/regexec.c index 7f2704f8214..723289bd0ad 100644 --- a/regex/regexec.c +++ b/regex/regexec.c @@ -133,7 +133,7 @@ int eflags; if ((size_t) g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) - return(smatcher(g, (char *)str, nmatch, pmatch, eflags)); + return(smatcher(preg->charset, g, (char *)str, nmatch, pmatch, eflags)); else - return(lmatcher(g, (char *)str, nmatch, pmatch, eflags)); + return(lmatcher(preg->charset, g, (char *)str, nmatch, pmatch, eflags)); } diff --git a/regex/reginit.c b/regex/reginit.c index 309685fadf2..74ad3dc6de4 100644 --- a/regex/reginit.c +++ b/regex/reginit.c @@ -7,7 +7,7 @@ static bool regex_inited=0; -void regex_init() +void regex_init(CHARSET_INFO *cs) { char buff[CCLASS_LAST][256]; int count[CCLASS_LAST]; @@ -20,27 +20,27 @@ void regex_init() for (i=1 ; i<= 255; i++) { - if (isalnum(i)) + if (my_isalnum(cs,i)) buff[CCLASS_ALNUM][count[CCLASS_ALNUM]++]=(char) i; - if (isalpha(i)) + if (my_isalpha(cs,i)) buff[CCLASS_ALPHA][count[CCLASS_ALPHA]++]=(char) i; - if (iscntrl(i)) + if (my_iscntrl(cs,i)) buff[CCLASS_CNTRL][count[CCLASS_CNTRL]++]=(char) i; - if (isdigit(i)) + if (my_isdigit(cs,i)) buff[CCLASS_DIGIT][count[CCLASS_DIGIT]++]=(char) i; - if (isgraph(i)) + if (my_isgraph(cs,i)) buff[CCLASS_GRAPH][count[CCLASS_GRAPH]++]=(char) i; - if (islower(i)) + if (my_islower(cs,i)) buff[CCLASS_LOWER][count[CCLASS_LOWER]++]=(char) i; - if (isprint(i)) + if (my_isprint(cs,i)) buff[CCLASS_PRINT][count[CCLASS_PRINT]++]=(char) i; - if (ispunct(i)) + if (my_ispunct(cs,i)) buff[CCLASS_PUNCT][count[CCLASS_PUNCT]++]=(char) i; - if (isspace(i)) + if (my_isspace(cs,i)) buff[CCLASS_SPACE][count[CCLASS_SPACE]++]=(char) i; - if (isupper(i)) + if (my_isupper(cs,i)) buff[CCLASS_UPPER][count[CCLASS_UPPER]++]=(char) i; - if (isxdigit(i)) + if (my_isxdigit(cs,i)) buff[CCLASS_XDIGIT][count[CCLASS_XDIGIT]++]=(char) i; } buff[CCLASS_BLANK][0]=' '; |