summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorAndy Lester <andy@petdance.com>2005-06-07 19:03:32 -0500
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2005-06-08 09:19:18 +0000
commit1a4fad37125bac3e69c02ee6b2e57c14777484e6 (patch)
tree6e9aad016aba50031a257b61ec24b927e144decc /regexec.c
parent942068f5eb2e4323f0443fdb27a222fde20b6e6d (diff)
downloadperl-1a4fad37125bac3e69c02ee6b2e57c14777484e6.tar.gz
regexec.c utf8 loading
Message-ID: <20050608050332.GA31059@petdance.com> p4raw-id: //depot/perl@24754
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c51
1 files changed, 28 insertions, 23 deletions
diff --git a/regexec.c b/regexec.c
index e727ba9c3b..e740690606 100644
--- a/regexec.c
+++ b/regexec.c
@@ -140,7 +140,12 @@
#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
-#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((const U8*)b); LEAVE; } } STMT_END
+#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
+ if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
+#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
+#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
+#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
+#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
/* for use after a quantifier and before an EXACT-like node -- japhy */
#define JUMPABLE(rn) ( \
@@ -1189,7 +1194,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32
}
tmp = ((OP(c) == BOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
- LOAD_UTF8_CHARCLASS(alnum,"a");
+ LOAD_UTF8_CHARCLASS_ALNUM();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (tmp == !(OP(c) == BOUND ?
swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
@@ -1232,7 +1237,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32
}
tmp = ((OP(c) == NBOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
- LOAD_UTF8_CHARCLASS(alnum,"a");
+ LOAD_UTF8_CHARCLASS_ALNUM();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (tmp == !(OP(c) == NBOUND ?
swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
@@ -1261,7 +1266,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32
break;
case ALNUM:
if (do_utf8) {
- LOAD_UTF8_CHARCLASS(alnum,"a");
+ LOAD_UTF8_CHARCLASS_ALNUM();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
@@ -1319,7 +1324,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32
break;
case NALNUM:
if (do_utf8) {
- LOAD_UTF8_CHARCLASS(alnum,"a");
+ LOAD_UTF8_CHARCLASS_ALNUM();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
@@ -1377,7 +1382,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32
break;
case SPACE:
if (do_utf8) {
- LOAD_UTF8_CHARCLASS(space," ");
+ LOAD_UTF8_CHARCLASS_SPACE();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
@@ -1435,7 +1440,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32
break;
case NSPACE:
if (do_utf8) {
- LOAD_UTF8_CHARCLASS(space," ");
+ LOAD_UTF8_CHARCLASS_SPACE();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
if (tmp && (norun || regtry(prog, s)))
@@ -1493,7 +1498,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32
break;
case DIGIT:
if (do_utf8) {
- LOAD_UTF8_CHARCLASS(digit,"0");
+ LOAD_UTF8_CHARCLASS_DIGIT();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
@@ -1551,7 +1556,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32
break;
case NDIGIT:
if (do_utf8) {
- LOAD_UTF8_CHARCLASS(digit,"0");
+ LOAD_UTF8_CHARCLASS_DIGIT();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
@@ -2929,7 +2934,7 @@ S_regmatch(pTHX_ regnode *prog)
if (!nextchr)
sayNO;
if (do_utf8) {
- LOAD_UTF8_CHARCLASS(alnum,"a");
+ LOAD_UTF8_CHARCLASS_ALNUM();
if (!(OP(scan) == ALNUM
? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
: isALNUM_LC_utf8((U8*)locinput)))
@@ -2952,7 +2957,7 @@ S_regmatch(pTHX_ regnode *prog)
if (!nextchr && locinput >= PL_regeol)
sayNO;
if (do_utf8) {
- LOAD_UTF8_CHARCLASS(alnum,"a");
+ LOAD_UTF8_CHARCLASS_ALNUM();
if (OP(scan) == NALNUM
? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
: isALNUM_LC_utf8((U8*)locinput))
@@ -2985,7 +2990,7 @@ S_regmatch(pTHX_ regnode *prog)
}
if (OP(scan) == BOUND || OP(scan) == NBOUND) {
ln = isALNUM_uni(ln);
- LOAD_UTF8_CHARCLASS(alnum,"a");
+ LOAD_UTF8_CHARCLASS_ALNUM();
n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
}
else {
@@ -3017,7 +3022,7 @@ S_regmatch(pTHX_ regnode *prog)
sayNO;
if (do_utf8) {
if (UTF8_IS_CONTINUED(nextchr)) {
- LOAD_UTF8_CHARCLASS(space," ");
+ LOAD_UTF8_CHARCLASS_SPACE();
if (!(OP(scan) == SPACE
? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
: isSPACE_LC_utf8((U8*)locinput)))
@@ -3047,7 +3052,7 @@ S_regmatch(pTHX_ regnode *prog)
if (!nextchr && locinput >= PL_regeol)
sayNO;
if (do_utf8) {
- LOAD_UTF8_CHARCLASS(space," ");
+ LOAD_UTF8_CHARCLASS_SPACE();
if (OP(scan) == NSPACE
? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
: isSPACE_LC_utf8((U8*)locinput))
@@ -3070,7 +3075,7 @@ S_regmatch(pTHX_ regnode *prog)
if (!nextchr)
sayNO;
if (do_utf8) {
- LOAD_UTF8_CHARCLASS(digit,"0");
+ LOAD_UTF8_CHARCLASS_DIGIT();
if (!(OP(scan) == DIGIT
? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
: isDIGIT_LC_utf8((U8*)locinput)))
@@ -3093,7 +3098,7 @@ S_regmatch(pTHX_ regnode *prog)
if (!nextchr && locinput >= PL_regeol)
sayNO;
if (do_utf8) {
- LOAD_UTF8_CHARCLASS(digit,"0");
+ LOAD_UTF8_CHARCLASS_DIGIT();
if (OP(scan) == NDIGIT
? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
: isDIGIT_LC_utf8((U8*)locinput))
@@ -3113,7 +3118,7 @@ S_regmatch(pTHX_ regnode *prog)
if (locinput >= PL_regeol)
sayNO;
if (do_utf8) {
- LOAD_UTF8_CHARCLASS(mark,"~");
+ LOAD_UTF8_CHARCLASS_MARK();
if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
sayNO;
locinput += PL_utf8skip[nextchr];
@@ -4437,7 +4442,7 @@ S_regrepeat(pTHX_ const regnode *p, I32 max)
case ALNUM:
if (do_utf8) {
loceol = PL_regeol;
- LOAD_UTF8_CHARCLASS(alnum,"a");
+ LOAD_UTF8_CHARCLASS_ALNUM();
while (hardcount < max && scan < loceol &&
swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
@@ -4465,7 +4470,7 @@ S_regrepeat(pTHX_ const regnode *p, I32 max)
case NALNUM:
if (do_utf8) {
loceol = PL_regeol;
- LOAD_UTF8_CHARCLASS(alnum,"a");
+ LOAD_UTF8_CHARCLASS_ALNUM();
while (hardcount < max && scan < loceol &&
!swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
@@ -4493,7 +4498,7 @@ S_regrepeat(pTHX_ const regnode *p, I32 max)
case SPACE:
if (do_utf8) {
loceol = PL_regeol;
- LOAD_UTF8_CHARCLASS(space," ");
+ LOAD_UTF8_CHARCLASS_SPACE();
while (hardcount < max && scan < loceol &&
(*scan == ' ' ||
swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
@@ -4522,7 +4527,7 @@ S_regrepeat(pTHX_ const regnode *p, I32 max)
case NSPACE:
if (do_utf8) {
loceol = PL_regeol;
- LOAD_UTF8_CHARCLASS(space," ");
+ LOAD_UTF8_CHARCLASS_SPACE();
while (hardcount < max && scan < loceol &&
!(*scan == ' ' ||
swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
@@ -4551,7 +4556,7 @@ S_regrepeat(pTHX_ const regnode *p, I32 max)
case DIGIT:
if (do_utf8) {
loceol = PL_regeol;
- LOAD_UTF8_CHARCLASS(digit,"0");
+ LOAD_UTF8_CHARCLASS_DIGIT();
while (hardcount < max && scan < loceol &&
swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
@@ -4565,7 +4570,7 @@ S_regrepeat(pTHX_ const regnode *p, I32 max)
case NDIGIT:
if (do_utf8) {
loceol = PL_regeol;
- LOAD_UTF8_CHARCLASS(digit,"0");
+ LOAD_UTF8_CHARCLASS_DIGIT();
while (hardcount < max && scan < loceol &&
!swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);