summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorKazuki Yamaguchi <k@rhe.jp>2022-09-26 16:39:53 +0900
committerKazuki Yamaguchi <k@rhe.jp>2022-09-26 16:44:46 +0900
commit5b0396473bbcd70756a09d887fb7436d6cd72dce (patch)
treebf33032d7dad0c028d9b1781bc6018e2f7e99e04 /string.c
parent1c14e406d3c4a4c660f66f0d1c642d1ed2aabed2 (diff)
downloadruby-5b0396473bbcd70756a09d887fb7436d6cd72dce.tar.gz
Fix coderange calculation in String#b
Leave the new coderange unknown if the original encoding is not ASCII-compatible. Non-ASCII-compatible encoding strings with valid or broken coderange can end up as ascii-only. Fixes 9a8f6e392fbd ("Cheaply derive code range for String#b return value", 2022-07-25).
Diffstat (limited to 'string.c')
-rw-r--r--string.c30
1 files changed, 16 insertions, 14 deletions
diff --git a/string.c b/string.c
index c5711bcfe3..b455c8b7d9 100644
--- a/string.c
+++ b/string.c
@@ -10771,20 +10771,22 @@ rb_str_b(VALUE str)
}
str_replace_shared_without_enc(str2, str);
- // BINARY strings can never be broken; they're either 7-bit ASCII or VALID.
- // If we know the receiver's code range then we know the result's code range.
- int cr = ENC_CODERANGE(str);
- switch (cr) {
- case ENC_CODERANGE_7BIT:
- ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT);
- break;
- case ENC_CODERANGE_BROKEN:
- case ENC_CODERANGE_VALID:
- ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID);
- break;
- default:
- ENC_CODERANGE_CLEAR(str2);
- break;
+ if (rb_enc_asciicompat(STR_ENC_GET(str))) {
+ // BINARY strings can never be broken; they're either 7-bit ASCII or VALID.
+ // If we know the receiver's code range then we know the result's code range.
+ int cr = ENC_CODERANGE(str);
+ switch (cr) {
+ case ENC_CODERANGE_7BIT:
+ ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT);
+ break;
+ case ENC_CODERANGE_BROKEN:
+ case ENC_CODERANGE_VALID:
+ ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID);
+ break;
+ default:
+ ENC_CODERANGE_CLEAR(str2);
+ break;
+ }
}
return str2;