summaryrefslogtreecommitdiff
path: root/pcre_study.c
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-09-10 11:02:48 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-09-10 11:02:48 +0000
commit7a3e01c442328741902c3c4b974d0e3024f645d1 (patch)
treee61b4534e91f97df907571a92db57623422e71d7 /pcre_study.c
parent640564bfeb0860032d98439dfa9f5585af59a09e (diff)
downloadpcre-7a3e01c442328741902c3c4b974d0e3024f645d1.tar.gz
General spring-clean of EBCDIC-related issues in the code, which had decayed
over time. Also the documentation. Added one test that can be run in an ASCII world to do a little testing of EBCDIC-related things. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1033 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_study.c')
-rw-r--r--pcre_study.c81
1 files changed, 46 insertions, 35 deletions
diff --git a/pcre_study.c b/pcre_study.c
index 0359e45..805c28f 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -567,15 +567,15 @@ if (utf && c > 127)
#endif /* Not SUPPORT_UCP */
return p;
}
-#else /* Not SUPPORT_UTF */
+#else /* Not SUPPORT_UTF */
(void)(utf); /* Stops warning for unused parameter */
-#endif
+#endif /* SUPPORT_UTF */
/* Not UTF-8 mode, or character is less than 127. */
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
return p + 1;
-#endif
+#endif /* COMPILE_PCRE8 */
#ifdef COMPILE_PCRE16
if (c > 0xff)
@@ -597,10 +597,12 @@ if (utf && c > 127)
c = 0xff;
SET_BIT(c);
}
-#endif
+#endif /* SUPPORT_UCP */
return p;
}
-#endif
+#else /* Not SUPPORT_UTF */
+(void)(utf); /* Stops warning for unused parameter */
+#endif /* SUPPORT_UTF */
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
return p + 1;
@@ -988,8 +990,8 @@ do
identical. */
case OP_HSPACE:
- SET_BIT(0x09);
- SET_BIT(0x20);
+ SET_BIT(CHAR_HT);
+ SET_BIT(CHAR_SPACE);
#ifdef SUPPORT_UTF
if (utf)
{
@@ -998,45 +1000,47 @@ do
SET_BIT(0xE1); /* For U+1680, U+180E */
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */
SET_BIT(0xE3); /* For U+3000 */
-#endif
+#endif /* COMPILE_PCRE8 */
#ifdef COMPILE_PCRE16
SET_BIT(0xA0);
SET_BIT(0xFF); /* For characters > 255 */
-#endif
+#endif /* COMPILE_PCRE16 */
}
else
#endif /* SUPPORT_UTF */
{
+#ifndef EBCDIC
SET_BIT(0xA0);
+#endif /* Not EBCDIC */
#ifdef COMPILE_PCRE16
SET_BIT(0xFF); /* For characters > 255 */
-#endif
+#endif /* COMPILE_PCRE16 */
}
try_next = FALSE;
break;
case OP_ANYNL:
case OP_VSPACE:
- SET_BIT(0x0A);
- SET_BIT(0x0B);
- SET_BIT(0x0C);
- SET_BIT(0x0D);
+ SET_BIT(CHAR_LF);
+ SET_BIT(CHAR_VT);
+ SET_BIT(CHAR_FF);
+ SET_BIT(CHAR_CR);
#ifdef SUPPORT_UTF
if (utf)
{
#ifdef COMPILE_PCRE8
SET_BIT(0xC2); /* For U+0085 */
SET_BIT(0xE2); /* For U+2028, U+2029 */
-#endif
+#endif /* COMPILE_PCRE8 */
#ifdef COMPILE_PCRE16
- SET_BIT(0x85);
+ SET_BIT(CHAR_NEL);
SET_BIT(0xFF); /* For characters > 255 */
-#endif
+#endif /* COMPILE_PCRE16 */
}
else
#endif /* SUPPORT_UTF */
{
- SET_BIT(0x85);
+ SET_BIT(CHAR_NEL);
#ifdef COMPILE_PCRE16
SET_BIT(0xFF); /* For characters > 255 */
#endif
@@ -1060,7 +1064,8 @@ do
break;
/* The cbit_space table has vertical tab as whitespace; we have to
- ensure it is set as not whitespace. */
+ ensure it is set as not whitespace. Luckily, the code value is the same
+ (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */
case OP_NOT_WHITESPACE:
set_nottype_bits(start_bits, cbit_space, table_limit, cd);
@@ -1068,8 +1073,9 @@ do
try_next = FALSE;
break;
- /* The cbit_space table has vertical tab as whitespace; we have to
- not set it from the table. */
+ /* The cbit_space table has vertical tab as whitespace; we have to not
+ set it from the table. Luckily, the code value is the same (0x0b) in
+ ASCII and EBCDIC, so we can just adjust the appropriate bit. */
case OP_WHITESPACE:
c = start_bits[1]; /* Save in case it was already set */
@@ -1123,8 +1129,8 @@ do
return SSB_FAIL;
case OP_HSPACE:
- SET_BIT(0x09);
- SET_BIT(0x20);
+ SET_BIT(CHAR_HT);
+ SET_BIT(CHAR_SPACE);
#ifdef SUPPORT_UTF
if (utf)
{
@@ -1133,38 +1139,40 @@ do
SET_BIT(0xE1); /* For U+1680, U+180E */
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */
SET_BIT(0xE3); /* For U+3000 */
-#endif
+#endif /* COMPILE_PCRE8 */
#ifdef COMPILE_PCRE16
SET_BIT(0xA0);
SET_BIT(0xFF); /* For characters > 255 */
-#endif
+#endif /* COMPILE_PCRE16 */
}
else
#endif /* SUPPORT_UTF */
+#ifndef EBCDIC
SET_BIT(0xA0);
+#endif /* Not EBCDIC */
break;
case OP_ANYNL:
case OP_VSPACE:
- SET_BIT(0x0A);
- SET_BIT(0x0B);
- SET_BIT(0x0C);
- SET_BIT(0x0D);
+ SET_BIT(CHAR_LF);
+ SET_BIT(CHAR_VT);
+ SET_BIT(CHAR_FF);
+ SET_BIT(CHAR_CR);
#ifdef SUPPORT_UTF
if (utf)
{
#ifdef COMPILE_PCRE8
SET_BIT(0xC2); /* For U+0085 */
SET_BIT(0xE2); /* For U+2028, U+2029 */
-#endif
+#endif /* COMPILE_PCRE8 */
#ifdef COMPILE_PCRE16
- SET_BIT(0x85);
+ SET_BIT(CHAR_NEL);
SET_BIT(0xFF); /* For characters > 255 */
-#endif
+#endif /* COMPILE_PCRE16 */
}
else
#endif /* SUPPORT_UTF */
- SET_BIT(0x85);
+ SET_BIT(CHAR_NEL);
break;
case OP_NOT_DIGIT:
@@ -1176,7 +1184,9 @@ do
break;
/* The cbit_space table has vertical tab as whitespace; we have to
- ensure it gets set as not whitespace. */
+ ensure it gets set as not whitespace. Luckily, the code value is the
+ same (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate
+ bit. */
case OP_NOT_WHITESPACE:
set_nottype_bits(start_bits, cbit_space, table_limit, cd);
@@ -1184,7 +1194,8 @@ do
break;
/* The cbit_space table has vertical tab as whitespace; we have to
- avoid setting it. */
+ avoid setting it. Luckily, the code value is the same (0x0b) in ASCII
+ and EBCDIC, so we can just adjust the appropriate bit. */
case OP_WHITESPACE:
c = start_bits[1]; /* Save in case it was already set */