diff options
author | Jeremy Evans <code@jeremyevans.net> | 2021-02-26 12:14:48 -0800 |
---|---|---|
committer | Jeremy Evans <code@jeremyevans.net> | 2022-08-11 08:47:29 -0700 |
commit | 49517b3bb436456407e0ee099c7442f3ab5ac53d (patch) | |
tree | 4ad9277c468e5961883264082618a67b14484abe | |
parent | c361cf44c03275405989022054d7c20efcc2a2ce (diff) | |
download | ruby-49517b3bb436456407e0ee099c7442f3ab5ac53d.tar.gz |
Fix inspect for unicode codepoint 0x85
This is an inelegant hack, by manually checking for this specific
code point in rb_str_inspect. Some testing indicates that this is
the only code point affected.
It's possible a better fix would be inside of lower-level encoding
code, such that rb_enc_isprint would return false and not true for
codepoint 0x85.
Fixes [Bug #16842]
-rw-r--r-- | string.c | 10 | ||||
-rw-r--r-- | test/ruby/test_string.rb | 5 |
2 files changed, 14 insertions, 1 deletions
@@ -6777,7 +6777,15 @@ rb_str_inspect(VALUE str) prev = p; continue; } - if ((enc == resenc && rb_enc_isprint(c, enc)) || + /* The special casing of 0x85 (NEXT_LINE) here is because + * Oniguruma historically treats it as printable, but it + * doesn't match the print POSIX bracket class or character + * property in regexps. + * + * See Ruby Bug #16842 for details: + * https://bugs.ruby-lang.org/issues/16842 + */ + if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) || (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) { continue; } diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index d37924dec1..ab14a3c17b 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -2614,6 +2614,11 @@ CODE assert_equal '"\x0012"', s.inspect, bug8290 end + def test_inspect_next_line + bug16842 = '[ruby-core:98231]' + assert_equal '"\\u0085"', 0x85.chr(Encoding::UTF_8).inspect, bug16842 + end + def test_partition assert_equal(%w(he l lo), S("hello").partition(/l/)) assert_equal(%w(he l lo), S("hello").partition("l")) |