diff options
author | Jeremy Evans <code@jeremyevans.net> | 2023-03-24 11:53:53 -0700 |
---|---|---|
committer | Jeremy Evans <code@jeremyevans.net> | 2023-04-23 19:27:58 -0700 |
commit | a8ba1ddd78544b4bda749051d44f7b2a8a0ec5ff (patch) | |
tree | fa7d0c86004486f857ecc04d3830ae1e1713c1bc /re.c | |
parent | ec211ad54d5ab8fef5031b8938028acaa1c5002a (diff) | |
download | ruby-a8ba1ddd78544b4bda749051d44f7b2a8a0ec5ff.tar.gz |
Use UTF-8 encoding for literal extended regexps with UTF-8 characters in comments
Fixes [Bug #19455]
Diffstat (limited to 're.c')
-rw-r--r-- | re.c | 9 |
1 files changed, 8 insertions, 1 deletions
@@ -2948,7 +2948,11 @@ escape_asis: case '#': if (extended_mode && !in_char_class) { /* consume and ignore comment in extended regexp */ - while ((p < end) && ((c = *p++) != '\n')); + while ((p < end) && ((c = *p++) != '\n')) { + if ((c & 0x80) && !*encp && enc == rb_utf8_encoding()) { + *encp = enc; + } + } break; } rb_str_buf_cat(buf, (char *)&c, 1); @@ -2983,6 +2987,9 @@ escape_asis: switch (c = *p++) { default: if (!(c & 0x80)) break; + if (!*encp && enc == rb_utf8_encoding()) { + *encp = enc; + } --p; /* fallthrough */ case '\\': |