summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNobuyoshi Nakada <nobu@ruby-lang.org>2022-08-30 18:12:08 +0900
committerNobuyoshi Nakada <nobu@ruby-lang.org>2022-08-31 17:27:59 +0900
commit576bdec03f0d58847690a0607c788ada433ce60f (patch)
treeda8277b325bfa1c033c2572662a6e781b589ae0f
parent9dc60653db186b1ae9400ed75b413a07728ce6ff (diff)
downloadruby-576bdec03f0d58847690a0607c788ada433ce60f.tar.gz
[Bug #18973] Promote US-ASCII to ASCII-8BIT when adding 8-bit char
-rw-r--r--internal/string.h1
-rw-r--r--sprintf.c13
-rw-r--r--string.c32
-rw-r--r--test/ruby/test_sprintf.rb3
4 files changed, 37 insertions, 12 deletions
diff --git a/internal/string.h b/internal/string.h
index 8fb9553d03..46862d77f5 100644
--- a/internal/string.h
+++ b/internal/string.h
@@ -43,6 +43,7 @@ char *rb_str_to_cstr(VALUE str);
const char *ruby_escaped_char(int c);
void rb_str_make_independent(VALUE str);
int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc);
+int rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code);
static inline bool STR_EMBED_P(VALUE str);
static inline bool STR_SHARED_P(VALUE str);
diff --git a/sprintf.c b/sprintf.c
index b2bdd4a072..bfe25e1d3c 100644
--- a/sprintf.c
+++ b/sprintf.c
@@ -454,13 +454,18 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt)
str = tmp;
goto format_s1;
}
- else {
- n = NUM2INT(val);
- if (n >= 0) n = rb_enc_codelen((c = n), enc);
- }
+ n = NUM2INT(val);
+ if (n >= 0) n = rb_enc_codelen((c = n), enc);
if (n <= 0) {
rb_raise(rb_eArgError, "invalid character");
}
+ int encidx = rb_ascii8bit_appendable_encoding_index(enc, c);
+ if (encidx >= 0 && encidx != rb_enc_to_index(enc)) {
+ /* special case */
+ rb_enc_associate_index(result, encidx);
+ enc = rb_enc_from_index(encidx);
+ coderange = ENC_CODERANGE_VALID;
+ }
if (!(flags & FWIDTH)) {
CHECK(n);
rb_enc_mbcput(c, &buf[blen], enc);
diff --git a/string.c b/string.c
index 564812ae51..951aeca6dd 100644
--- a/string.c
+++ b/string.c
@@ -3481,17 +3481,13 @@ rb_str_concat(VALUE str1, VALUE str2)
return rb_str_append(str1, str2);
}
- encidx = rb_enc_to_index(enc);
- if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
- /* US-ASCII automatically extended to ASCII-8BIT */
+ encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
+ if (encidx >= 0) {
char buf[1];
buf[0] = (char)code;
- if (code > 0xFF) {
- rb_raise(rb_eRangeError, "%u out of char range", code);
- }
rb_str_cat(str1, buf, 1);
- if (encidx == ENCINDEX_US_ASCII && code > 127) {
- rb_enc_associate_index(str1, ENCINDEX_ASCII_8BIT);
+ if (encidx != rb_enc_to_index(enc)) {
+ rb_enc_associate_index(str1, encidx);
ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID);
}
}
@@ -3524,6 +3520,26 @@ rb_str_concat(VALUE str1, VALUE str2)
return str1;
}
+int
+rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code)
+{
+ int encidx = rb_enc_to_index(enc);
+
+ if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
+ /* US-ASCII automatically extended to ASCII-8BIT */
+ if (code > 0xFF) {
+ rb_raise(rb_eRangeError, "%u out of char range", code);
+ }
+ if (encidx == ENCINDEX_US_ASCII && code > 127) {
+ return ENCINDEX_ASCII_8BIT;
+ }
+ return encidx;
+ }
+ else {
+ return -1;
+ }
+}
+
/*
* call-seq:
* prepend(*other_strings) -> string
diff --git a/test/ruby/test_sprintf.rb b/test/ruby/test_sprintf.rb
index 803399fdb3..c453ecd350 100644
--- a/test/ruby/test_sprintf.rb
+++ b/test/ruby/test_sprintf.rb
@@ -369,6 +369,9 @@ class TestSprintf < Test::Unit::TestCase
assert_equal(" " * BSIZ + "a", sprintf("%#{ BSIZ + 1 }c", ?a))
assert_equal("a" + " " * BSIZ, sprintf("%-#{ BSIZ + 1 }c", ?a))
assert_raise(ArgumentError) { sprintf("%c", -1) }
+ s = sprintf("%c".encode(Encoding::US_ASCII), 0x80)
+ assert_equal("\x80".b, s)
+ assert_predicate(s, :valid_encoding?)
end
def test_string