From 5b0396473bbcd70756a09d887fb7436d6cd72dce Mon Sep 17 00:00:00 2001 From: Kazuki Yamaguchi Date: Mon, 26 Sep 2022 16:39:53 +0900 Subject: Fix coderange calculation in String#b Leave the new coderange unknown if the original encoding is not ASCII-compatible. Non-ASCII-compatible encoding strings with valid or broken coderange can end up as ascii-only. Fixes 9a8f6e392fbd ("Cheaply derive code range for String#b return value", 2022-07-25). --- string.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'string.c') diff --git a/string.c b/string.c index c5711bcfe3..b455c8b7d9 100644 --- a/string.c +++ b/string.c @@ -10771,20 +10771,22 @@ rb_str_b(VALUE str) } str_replace_shared_without_enc(str2, str); - // BINARY strings can never be broken; they're either 7-bit ASCII or VALID. - // If we know the receiver's code range then we know the result's code range. - int cr = ENC_CODERANGE(str); - switch (cr) { - case ENC_CODERANGE_7BIT: - ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT); - break; - case ENC_CODERANGE_BROKEN: - case ENC_CODERANGE_VALID: - ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID); - break; - default: - ENC_CODERANGE_CLEAR(str2); - break; + if (rb_enc_asciicompat(STR_ENC_GET(str))) { + // BINARY strings can never be broken; they're either 7-bit ASCII or VALID. + // If we know the receiver's code range then we know the result's code range. + int cr = ENC_CODERANGE(str); + switch (cr) { + case ENC_CODERANGE_7BIT: + ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT); + break; + case ENC_CODERANGE_BROKEN: + case ENC_CODERANGE_VALID: + ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID); + break; + default: + ENC_CODERANGE_CLEAR(str2); + break; + } } return str2; -- cgit v1.2.1