summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2015-06-12 16:25:23 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2015-06-12 16:25:23 +0000
commit80cacc7c366941eb206046126dd87939d1469d1e (patch)
treef2fbb6e0c5402268077f70cb7a700907139e2aea
parentb538e80fa39a55ecc0b38e46fabc9ecc5bee168e (diff)
downloadpcre2-80cacc7c366941eb206046126dd87939d1469d1e.tar.gz
Fix \a and \e in pcre2test, and \a in pcre2_compile, on EBCDIC platforms.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@284 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--ChangeLog6
-rw-r--r--src/pcre2_compile.c4
-rw-r--r--src/pcre2_internal.h68
-rw-r--r--src/pcre2test.c4
4 files changed, 48 insertions, 34 deletions
diff --git a/ChangeLog b/ChangeLog
index 37b565d..766f6b2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -155,6 +155,12 @@ being treated as a literal 'l' instead of causing an error.
an empty string was repeated, it was not identified as matching an empty string
itself. For example: /^(?:(?(1)x|)+)+$()/.
+40. In an EBCDIC environment, pcretest was mishandling the escape sequences
+\a and \e in test subject lines.
+
+41. In an EBCDIC environment, \a in a pattern was converted to the ASCII
+instead of the EBCDIC value.
+
Version 10.10 06-March-2015
---------------------------
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 80c2d08..9ad36d0 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -296,7 +296,7 @@ static const short int escapes[] = {
-ESC_Z, CHAR_LEFT_SQUARE_BRACKET,
CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET,
CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE,
- CHAR_GRAVE_ACCENT, 7,
+ CHAR_GRAVE_ACCENT, ESC_a,
-ESC_b, 0,
-ESC_d, ESC_e,
ESC_f, 0,
@@ -328,7 +328,7 @@ because it is defined as 'a', which of course picks up the ASCII value. */
#endif
static const short int escapes[] = {
-/* 80 */ 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
+/* 80 */ ESC_a, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0,
/* 90 */ 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p,
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
index e2a9252..c6d1427 100644
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@@ -1192,31 +1192,6 @@ only. */
/* -------------------- Definitions for compiled patterns -------------------*/
-/* Escape items that are just an encoding of a particular data value. */
-
-#ifndef ESC_e
-#define ESC_e CHAR_ESC
-#endif
-
-#ifndef ESC_f
-#define ESC_f CHAR_FF
-#endif
-
-#ifndef ESC_n
-#define ESC_n CHAR_LF
-#endif
-
-#ifndef ESC_r
-#define ESC_r CHAR_CR
-#endif
-
-/* We can't officially use ESC_t because it is a POSIX reserved identifier
-(presumably because of all the others like size_t). */
-
-#ifndef ESC_tee
-#define ESC_tee CHAR_HT
-#endif
-
/* Codes for different types of Unicode property */
#define PT_ANY 0 /* Any property - matches all chars */
@@ -1255,13 +1230,46 @@ contain characters with values greater than 255. */
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
+/* Escape items that are just an encoding of a particular data value. These
+appear in the escapes[] table in pcre2_compile.c as positive numbers. */
+
+#ifndef ESC_a
+#define ESC_a CHAR_BEL
+#endif
+
+#ifndef ESC_e
+#define ESC_e CHAR_ESC
+#endif
+
+#ifndef ESC_f
+#define ESC_f CHAR_FF
+#endif
+
+#ifndef ESC_n
+#define ESC_n CHAR_LF
+#endif
+
+#ifndef ESC_r
+#define ESC_r CHAR_CR
+#endif
+
+/* We can't officially use ESC_t because it is a POSIX reserved identifier
+(presumably because of all the others like size_t). */
+
+#ifndef ESC_tee
+#define ESC_tee CHAR_HT
+#endif
+
/* These are escaped items that aren't just an encoding of a particular data
value such as \n. They must have non-zero values, as check_escape() returns 0
-for a data character. Also, they must appear in the same order as in the
-opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
-corresponds to "." in DOTALL mode rather than an escape sequence. It is also
-used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
-non-DOTALL mode, "." behaves like \N.
+for a data character. In the escapes[] table in pcre2_compile.c their values
+are negated in order to distinguish them from data values.
+
+They must appear here in the same order as in the opcode definitions below, up
+to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL
+mode rather than an escape sequence. It is also used for [^] in JavaScript
+compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
+like \N.
The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
when PCRE_UCP is set and replacement of \d etc by \p sequences is required.
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 97ba5bb..1759a22 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -5181,9 +5181,9 @@ while ((c = *p++) != 0)
else switch ((c = *p++))
{
case '\\': break;
- case 'a': c = 7; break;
+ case 'a': c = CHAR_BEL; break;
case 'b': c = '\b'; break;
- case 'e': c = 27; break;
+ case 'e': c = CHAR_ESC; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;