summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorJeremy Evans <code@jeremyevans.net>2021-02-26 12:14:48 -0800
committerJeremy Evans <code@jeremyevans.net>2022-08-11 08:47:29 -0700
commit49517b3bb436456407e0ee099c7442f3ab5ac53d (patch)
tree4ad9277c468e5961883264082618a67b14484abe /string.c
parentc361cf44c03275405989022054d7c20efcc2a2ce (diff)
downloadruby-49517b3bb436456407e0ee099c7442f3ab5ac53d.tar.gz
Fix inspect for unicode codepoint 0x85
This is an inelegant hack, by manually checking for this specific code point in rb_str_inspect. Some testing indicates that this is the only code point affected. It's possible a better fix would be inside of lower-level encoding code, such that rb_enc_isprint would return false and not true for codepoint 0x85. Fixes [Bug #16842]
Diffstat (limited to 'string.c')
-rw-r--r--string.c10
1 files changed, 9 insertions, 1 deletions
diff --git a/string.c b/string.c
index 85819e26a3..e74783cf92 100644
--- a/string.c
+++ b/string.c
@@ -6777,7 +6777,15 @@ rb_str_inspect(VALUE str)
prev = p;
continue;
}
- if ((enc == resenc && rb_enc_isprint(c, enc)) ||
+ /* The special casing of 0x85 (NEXT_LINE) here is because
+ * Oniguruma historically treats it as printable, but it
+ * doesn't match the print POSIX bracket class or character
+ * property in regexps.
+ *
+ * See Ruby Bug #16842 for details:
+ * https://bugs.ruby-lang.org/issues/16842
+ */
+ if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) ||
(asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
continue;
}