summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--re.c153
-rw-r--r--test/ruby/test_regexp.rb56
2 files changed, 176 insertions, 33 deletions
diff --git a/re.c b/re.c
index b4bade1089..a53493b0d6 100644
--- a/re.c
+++ b/re.c
@@ -2801,14 +2801,18 @@ unescape_unicode_bmp(const char **pp, const char *end,
}
static int
-unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
+unescape_nonascii0(const char **pp, const char *end, rb_encoding *enc,
VALUE buf, rb_encoding **encp, int *has_property,
- onig_errmsg_buffer err, int options)
+ onig_errmsg_buffer err, int options, int recurse)
{
+ const char *p = *pp;
unsigned char c;
char smallbuf[2];
int in_char_class = 0;
+ int parens = 1; /* ignored unless recurse is true */
+ int extended_mode = options & ONIG_OPTION_EXTEND;
+begin_scan:
while (p < end) {
int chlen = rb_enc_precise_mbclen(p, end, enc);
if (!MBCLEN_CHARFOUND_P(chlen)) {
@@ -2920,7 +2924,7 @@ escape_asis:
break;
case '#':
- if ((options & ONIG_OPTION_EXTEND) && !in_char_class) {
+ if (extended_mode && !in_char_class) {
/* consume and ignore comment in extended regexp */
while ((p < end) && ((c = *p++) != '\n'));
break;
@@ -2937,51 +2941,134 @@ escape_asis:
}
rb_str_buf_cat(buf, (char *)&c, 1);
break;
+ case ')':
+ rb_str_buf_cat(buf, (char *)&c, 1);
+ if (!in_char_class && recurse) {
+ if (--parens == 0) {
+ *pp = p;
+ return 0;
+ }
+ }
+ break;
case '(':
- if (!in_char_class && p + 1 < end && *p == '?' && *(p+1) == '#') {
- /* (?# is comment inside any regexp, and content inside should be ignored */
- const char *orig_p = p;
- int cont = 1;
-
- while (cont && (p < end)) {
- switch (c = *p++) {
- default:
- if (!(c & 0x80)) break;
- --p;
- /* fallthrough */
- case '\\':
- chlen = rb_enc_precise_mbclen(p, end, enc);
- if (!MBCLEN_CHARFOUND_P(chlen)) {
- goto invalid_multibyte;
+ if (!in_char_class && p + 1 < end && *p == '?') {
+ if (*(p+1) == '#') {
+ /* (?# is comment inside any regexp, and content inside should be ignored */
+ const char *orig_p = p;
+ int cont = 1;
+
+ while (cont && (p < end)) {
+ switch (c = *p++) {
+ default:
+ if (!(c & 0x80)) break;
+ --p;
+ /* fallthrough */
+ case '\\':
+ chlen = rb_enc_precise_mbclen(p, end, enc);
+ if (!MBCLEN_CHARFOUND_P(chlen)) {
+ goto invalid_multibyte;
+ }
+ p += MBCLEN_CHARFOUND_LEN(chlen);
+ break;
+ case ')':
+ cont = 0;
+ break;
}
- p += MBCLEN_CHARFOUND_LEN(chlen);
- break;
- case ')':
- cont = 0;
- break;
}
- }
- if (cont) {
- /* unterminated (?#, rewind so it is syntax error */
- p = orig_p;
- c = '(';
- rb_str_buf_cat(buf, (char *)&c, 1);
+ if (cont) {
+ /* unterminated (?#, rewind so it is syntax error */
+ p = orig_p;
+ c = '(';
+ rb_str_buf_cat(buf, (char *)&c, 1);
+ }
+ break;
+ } else {
+ /* potential change of extended option */
+ int invert = 0;
+ int local_extend = 0;
+ const char *s;
+
+ if (recurse) {
+ parens++;
+ }
+
+ for(s = p+1; s < end; s++) {
+ switch(*s) {
+ case 'x':
+ local_extend = invert ? -1 : 1;
+ break;
+ case '-':
+ invert = 1;
+ break;
+ case ':':
+ case ')':
+ if (local_extend == 0 ||
+ (local_extend == -1 && !extended_mode) ||
+ (local_extend == 1 && extended_mode)) {
+ /* no changes to extended flag */
+ goto fallthrough;
+ }
+
+ if (*s == ':') {
+ /* change extended flag until ')' */
+ int local_options = options;
+ if (local_extend == 1) {
+ local_options |= ONIG_OPTION_EXTEND;
+ } else {
+ local_options &= ~ONIG_OPTION_EXTEND;
+ }
+
+ rb_str_buf_cat(buf, (char *)&c, 1);
+ int ret = unescape_nonascii0(&p, end, enc, buf, encp,
+ has_property, err,
+ local_options, 1);
+ if (ret < 0) return ret;
+ goto begin_scan;
+ } else {
+ /* change extended flag for rest of expression */
+ extended_mode = local_extend == 1;
+ goto fallthrough;
+ }
+ case 'i':
+ case 'm':
+ case 'a':
+ case 'd':
+ case 'u':
+ /* other option flags, ignored during scanning */
+ break;
+ default:
+ /* other character, no extended flag change*/
+ goto fallthrough;
+ }
+ }
}
+ } else if (!in_char_class && recurse) {
+ parens++;
}
- else {
- rb_str_buf_cat(buf, (char *)&c, 1);
- }
- break;
+ /* FALLTHROUGH */
default:
+fallthrough:
rb_str_buf_cat(buf, (char *)&c, 1);
break;
}
}
+ if (recurse) {
+ *pp = p;
+ }
return 0;
}
+static int
+unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
+ VALUE buf, rb_encoding **encp, int *has_property,
+ onig_errmsg_buffer err, int options)
+{
+ return unescape_nonascii0(&p, end, enc, buf, encp, has_property,
+ err, options, 0);
+}
+
static VALUE
rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc,
rb_encoding **fixed_enc, onig_errmsg_buffer err, int options)
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index 98bf41d2f1..c871580aeb 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -144,6 +144,62 @@ class TestRegexp < Test::Unit::TestCase
assert_raise(SyntaxError) {eval "/# \\users/"}
end
+ def test_nonextended_section_of_extended_regexp_bug_19379
+ assert_separately([], <<-'RUBY')
+ re = /(?-x:#)/x
+ assert_match(re, '#')
+ assert_not_match(re, '-')
+
+ re = /(?xi:#
+ y)/
+ assert_match(re, 'Y')
+ assert_not_match(re, '-')
+
+ re = /(?mix:#
+ y)/
+ assert_match(re, 'Y')
+ assert_not_match(re, '-')
+
+ re = /(?x-im:#
+ y)/i
+ assert_match(re, 'y')
+ assert_not_match(re, 'Y')
+
+ re = /(?-imx:(?xim:#
+ y))/x
+ assert_match(re, 'y')
+ assert_not_match(re, '-')
+
+ re = /(?x)#
+ y/
+ assert_match(re, 'y')
+ assert_not_match(re, 'Y')
+
+ re = /(?mx-i)#
+ y/i
+ assert_match(re, 'y')
+ assert_not_match(re, 'Y')
+
+ re = /(?-imx:(?xim:#
+ (?-x)y#))/x
+ assert_match(re, 'Y#')
+ assert_not_match(re, '-#')
+
+ re = /(?imx:#
+ (?-xim:#(?im)#(?x)#
+ )#
+ (?x)#
+ y)/
+ assert_match(re, '###Y')
+ assert_not_match(re, '###-')
+
+ re = %r{#c-\w+/comment/[\w-]+}
+ re = %r{https?://[^/]+#{re}}x
+ assert_match(re, 'http://foo#c-x/comment/bar')
+ assert_not_match(re, 'http://foo#cx/comment/bar')
+ RUBY
+ end
+
def test_union
assert_equal :ok, begin
Regexp.union(