From 49517b3bb436456407e0ee099c7442f3ab5ac53d Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Fri, 26 Feb 2021 12:14:48 -0800 Subject: Fix inspect for unicode codepoint 0x85 This is an inelegant hack, by manually checking for this specific code point in rb_str_inspect. Some testing indicates that this is the only code point affected. It's possible a better fix would be inside of lower-level encoding code, such that rb_enc_isprint would return false and not true for codepoint 0x85. Fixes [Bug #16842] --- string.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'string.c') diff --git a/string.c b/string.c index 85819e26a3..e74783cf92 100644 --- a/string.c +++ b/string.c @@ -6777,7 +6777,15 @@ rb_str_inspect(VALUE str) prev = p; continue; } - if ((enc == resenc && rb_enc_isprint(c, enc)) || + /* The special casing of 0x85 (NEXT_LINE) here is because + * Oniguruma historically treats it as printable, but it + * doesn't match the print POSIX bracket class or character + * property in regexps. + * + * See Ruby Bug #16842 for details: + * https://bugs.ruby-lang.org/issues/16842 + */ + if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) || (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) { continue; } -- cgit v1.2.1