summaryrefslogtreecommitdiff
path: root/re.c
diff options
context:
space:
mode:
authorJeremy Evans <code@jeremyevans.net>2023-03-24 11:53:53 -0700
committerJeremy Evans <code@jeremyevans.net>2023-04-23 19:27:58 -0700
commita8ba1ddd78544b4bda749051d44f7b2a8a0ec5ff (patch)
treefa7d0c86004486f857ecc04d3830ae1e1713c1bc /re.c
parentec211ad54d5ab8fef5031b8938028acaa1c5002a (diff)
downloadruby-a8ba1ddd78544b4bda749051d44f7b2a8a0ec5ff.tar.gz
Use UTF-8 encoding for literal extended regexps with UTF-8 characters in comments
Fixes [Bug #19455]
Diffstat (limited to 're.c')
-rw-r--r--re.c9
1 files changed, 8 insertions, 1 deletions
diff --git a/re.c b/re.c
index d7490bbbbf..f6abf46131 100644
--- a/re.c
+++ b/re.c
@@ -2948,7 +2948,11 @@ escape_asis:
case '#':
if (extended_mode && !in_char_class) {
/* consume and ignore comment in extended regexp */
- while ((p < end) && ((c = *p++) != '\n'));
+ while ((p < end) && ((c = *p++) != '\n')) {
+ if ((c & 0x80) && !*encp && enc == rb_utf8_encoding()) {
+ *encp = enc;
+ }
+ }
break;
}
rb_str_buf_cat(buf, (char *)&c, 1);
@@ -2983,6 +2987,9 @@ escape_asis:
switch (c = *p++) {
default:
if (!(c & 0x80)) break;
+ if (!*encp && enc == rb_utf8_encoding()) {
+ *encp = enc;
+ }
--p;
/* fallthrough */
case '\\':