summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy Evans <code@jeremyevans.net>2021-02-26 12:14:48 -0800
committerJeremy Evans <code@jeremyevans.net>2022-08-11 08:47:29 -0700
commit49517b3bb436456407e0ee099c7442f3ab5ac53d (patch)
tree4ad9277c468e5961883264082618a67b14484abe
parentc361cf44c03275405989022054d7c20efcc2a2ce (diff)
downloadruby-49517b3bb436456407e0ee099c7442f3ab5ac53d.tar.gz
Fix inspect for unicode codepoint 0x85
This is an inelegant hack, by manually checking for this specific code point in rb_str_inspect. Some testing indicates that this is the only code point affected. It's possible a better fix would be inside of lower-level encoding code, such that rb_enc_isprint would return false and not true for codepoint 0x85. Fixes [Bug #16842]
-rw-r--r--string.c10
-rw-r--r--test/ruby/test_string.rb5
2 files changed, 14 insertions, 1 deletions
diff --git a/string.c b/string.c
index 85819e26a3..e74783cf92 100644
--- a/string.c
+++ b/string.c
@@ -6777,7 +6777,15 @@ rb_str_inspect(VALUE str)
prev = p;
continue;
}
- if ((enc == resenc && rb_enc_isprint(c, enc)) ||
+ /* The special casing of 0x85 (NEXT_LINE) here is because
+ * Oniguruma historically treats it as printable, but it
+ * doesn't match the print POSIX bracket class or character
+ * property in regexps.
+ *
+ * See Ruby Bug #16842 for details:
+ * https://bugs.ruby-lang.org/issues/16842
+ */
+ if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) ||
(asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
continue;
}
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index d37924dec1..ab14a3c17b 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2614,6 +2614,11 @@ CODE
assert_equal '"\x0012"', s.inspect, bug8290
end
+ def test_inspect_next_line
+ bug16842 = '[ruby-core:98231]'
+ assert_equal '"\\u0085"', 0x85.chr(Encoding::UTF_8).inspect, bug16842
+ end
+
def test_partition
assert_equal(%w(he l lo), S("hello").partition(/l/))
assert_equal(%w(he l lo), S("hello").partition("l"))