From 49517b3bb436456407e0ee099c7442f3ab5ac53d Mon Sep 17 00:00:00 2001
From: Jeremy Evans <code@jeremyevans.net>
Date: Fri, 26 Feb 2021 12:14:48 -0800
Subject: Fix inspect for unicode codepoint 0x85

This is an inelegant hack, by manually checking for this specific
code point in rb_str_inspect.  Some testing indicates that this is
the only code point affected.

It's possible a better fix would be inside of lower-level encoding
code, such that rb_enc_isprint would return false and not true for
codepoint 0x85.

Fixes [Bug #16842]
---
 string.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'string.c')

diff --git a/string.c b/string.c
index 85819e26a3..e74783cf92 100644
--- a/string.c
+++ b/string.c
@@ -6777,7 +6777,15 @@ rb_str_inspect(VALUE str)
             prev = p;
             continue;
         }
-        if ((enc == resenc && rb_enc_isprint(c, enc)) ||
+        /* The special casing of 0x85 (NEXT_LINE) here is because
+         * Oniguruma historically treats it as printable, but it
+         * doesn't match the print POSIX bracket class or character
+         * property in regexps.
+         *
+         * See Ruby Bug #16842 for details:
+         * https://bugs.ruby-lang.org/issues/16842
+         */
+        if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) ||
             (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
             continue;
         }
-- 
cgit v1.2.1