summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/pcre2_convert.c108
-rw-r--r--testdata/testinput242
-rw-r--r--testdata/testoutput2434
3 files changed, 90 insertions, 54 deletions
diff --git a/src/pcre2_convert.c b/src/pcre2_convert.c
index ea99556..645f6fb 100644
--- a/src/pcre2_convert.c
+++ b/src/pcre2_convert.c
@@ -471,8 +471,8 @@ Arguments:
pattern_end end of pattern
out output context
-Returns: TRUE => success
- FALSE => malformed class
+Returns: >0 => class index
+ 0 => malformed class
*/
static int
@@ -481,48 +481,31 @@ convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
{
static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
"graph:lower:print:punct:space:upper:word:xdigit:";
-PCRE2_SPTR pattern = *from;
-PCRE2_SPTR start;
+PCRE2_SPTR start = *from + 1;
+PCRE2_SPTR pattern = start;
const char *class_ptr;
PCRE2_UCHAR c;
-
-out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
-out->out_str[1] = CHAR_COLON;
-convert_glob_write_str(out, 2);
+int class_index;
while (TRUE)
{
- if (pattern >= pattern_end)
- {
- *from = pattern;
- return ERROR_MISSING_SQUARE_BRACKET;
- }
+ if (pattern >= pattern_end) return 0;
c = *pattern++;
- if (c == CHAR_COLON && pattern < pattern_end &&
- *pattern == CHAR_RIGHT_SQUARE_BRACKET)
- {
- break;
- }
-
- if (c < CHAR_a || c > CHAR_z)
- {
- /* All POSIX class is composed of lowercase characters */
- *from = pattern;
- return ERROR_MISSING_SQUARE_BRACKET;
- }
-
- convert_glob_write(out, c);
+ if (c < CHAR_a || c > CHAR_z) break;
}
-start = *from;
-*from = pattern + 1;
+if (c != CHAR_COLON || pattern >= pattern_end ||
+ *pattern != CHAR_RIGHT_SQUARE_BRACKET)
+ return 0;
+
class_ptr = posix_classes;
+class_index = 0;
while (TRUE)
{
- if (*class_ptr == CHAR_NULL) return ERROR_UNKNOWN_POSIX_CLASS;
+ if (*class_ptr == CHAR_NULL) return 0;
pattern = start;
@@ -530,10 +513,13 @@ while (TRUE)
{
if (*pattern == CHAR_COLON)
{
- out->out_str[0] = CHAR_COLON;
- out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
- convert_glob_write_str(out, 2);
- return 0;
+ pattern += 2;
+ start -= 2;
+
+ do convert_glob_write(out, *start++); while (start < pattern);
+
+ *from = pattern;
+ return class_index;
}
pattern++;
class_ptr++;
@@ -541,9 +527,41 @@ while (TRUE)
while (*class_ptr != CHAR_COLON) class_ptr++;
class_ptr++;
+ class_index++;
}
}
+/* Checks whether the character is in the class.
+
+Arguments:
+ class_index class index
+ c character
+
+Returns: !0 => character is found in the class
+ 0 => otherwise
+*/
+
+static BOOL
+convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
+{
+switch (class_index)
+ {
+ case 0: return isalnum(c);
+ case 1: return isalpha(c);
+ case 2: return 1;
+ case 3: return c == CHAR_HT || c == CHAR_SPACE;
+ case 4: return iscntrl(c);
+ case 5: return isdigit(c);
+ case 6: return isgraph(c);
+ case 7: return islower(c);
+ case 8: return isprint(c);
+ case 9: return ispunct(c);
+ case 10: return isspace(c);
+ case 11: return isupper(c);
+ case 12: return isalnum(c) || c == CHAR_UNDERSCORE;
+ default: return isxdigit(c);
+ }
+}
/* Parse a range of characters.
@@ -569,7 +587,7 @@ BOOL has_prev_c;
PCRE2_SPTR pattern = *from;
PCRE2_SPTR char_start = NULL;
uint32_t c, prev_c;
-int result, len;
+int len, class_index;
(void)utf; /* Avoid compiler warning. */
@@ -653,17 +671,21 @@ while (pattern < pattern_end)
if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
{
- *from = pattern + 1;
+ *from = pattern;
+ class_index = convert_glob_parse_class(from, pattern_end, out);
- result = convert_glob_parse_class(from, pattern_end, out);
- if (result != 0) return result;
+ if (class_index != 0)
+ {
+ pattern = *from;
- pattern = *from;
+ has_prev_c = FALSE;
+ prev_c = 0;
- has_prev_c = FALSE;
- prev_c = 0;
- separator_seen = TRUE;
- continue;
+ if (!is_negative &&
+ convert_glob_char_in_class (class_index, separator))
+ separator_seen = TRUE;
+ continue;
+ }
}
else if (c == CHAR_MINUS && has_prev_c &&
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
diff --git a/testdata/testinput24 b/testdata/testinput24
index 69fa366..f25c04b 100644
--- a/testdata/testinput24
+++ b/testdata/testinput24
@@ -237,6 +237,8 @@
/[[:alpha:]-a]/
+/[[:alpha:]][[:punct:]][[:ascii:]]/
+
/[a-[:alpha:]]/
/[[:alpha:/
diff --git a/testdata/testoutput24 b/testdata/testoutput24
index dd0f1bc..622c0c6 100644
--- a/testdata/testoutput24
+++ b/testdata/testoutput24
@@ -179,12 +179,12 @@ No match
No match
'[[:alpha:]][[:digit:]][[:upper:]]'
-(?s)\A[[:alpha:]](?<!/)[[:digit:]](?<!/)[[:upper:]](?<!/)\z
+(?s)\A[[:alpha:]][[:digit:]][[:upper:]]\z
a1B
0: a1B
'[[:digit:][:upper:][:space:]]'
-(?s)\A[[:digit:][:upper:][:space:]](?<!/)\z
+(?s)\A[[:digit:][:upper:][:space:]]\z
A
0: A
1
@@ -198,7 +198,7 @@ No match
No match
'[a-c[:digit:]x-z]'
-(?s)\A[a-c[:digit:]x-z](?<!/)\z
+(?s)\A[a-c[:digit:]x-z]\z
5
0: 5
b
@@ -294,7 +294,7 @@ No match
0: <->
/a[[:digit:].]z/
-(?s)\Aa[[:digit:].](?<!/)z\z
+(?s)\Aa[[:digit:].]z\z
a1z
0: a1z
a.z
@@ -304,20 +304,29 @@ No match
No match
/a[[:digit].]z/
-** Pattern conversion error at offset 10: missing terminating ] for character class
+(?s)\Aa[\[:digit]\.\]z\z
a[.]z
+ 0: a[.]z
a:.]z
+ 0: a:.]z
ad.]z
+ 0: ad.]z
/<[[:a[:digit:]b]>/
-** Pattern conversion error at offset 6: missing terminating ] for character class
+(?s)\A<[\[:a[:digit:]b]>\z
<[>
+ 0: <[>
<:>
+ 0: <:>
<a>
+ 0: <a>
<9>
+ 0: <9>
<b>
+ 0: <b>
\= Expect no match
<d>
+No match
/a*b/convert_glob_separator=\
(?s)\Aa(*COMMIT)[^\\]*?b\z
@@ -349,7 +358,7 @@ No match
(?s)\A[^/a\\bc][^/\]][^/\-][^/\]\-]\z
/[[:alpha:][:xdigit:][:word:]]/
-(?s)\A[[:alpha:][:xdigit:][:word:]](?<!/)\z
+(?s)\A[[:alpha:][:xdigit:][:word:]]\z
"[/-/]"
(?s)\A[/-/](?<!/)\z
@@ -364,7 +373,10 @@ No match
(?s)\A[^/\--\-\--\-]\z
/[[:alpha:]-a]/
-(?s)\A[[:alpha:]\-a](?<!/)\z
+(?s)\A[[:alpha:]\-a]\z
+
+/[[:alpha:]][[:punct:]][[:ascii:]]/
+(?s)\A[[:alpha:]][[:punct:]](?<!/)[[:ascii:]](?<!/)\z
/[a-[:alpha:]]/
** Pattern conversion error at offset 4: invalid syntax
@@ -376,13 +388,13 @@ No match
** Pattern conversion error at offset 10: missing terminating ] for character class
/[[:alphaa:]]/
-** Pattern conversion error at offset 11: unknown POSIX class name
+(?s)\A[\[:alphaa:]\]\z
/[[:xdigi:]]/
-** Pattern conversion error at offset 10: unknown POSIX class name
+(?s)\A[\[:xdigi:]\]\z
/[[:xdigit::]]/
-** Pattern conversion error at offset 10: missing terminating ] for character class
+(?s)\A[\[:xdigit::]\]\z
/****/
(?s)