summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2015-09-16 05:50:00 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2015-09-16 05:50:00 +0000
commit965d8241c1ac3556685a833206d644bd6c7dc646 (patch)
tree0cd1be11831d65517b08c912815a33d230f2948e
parent22190b3f32010075c1dacd6d9cbec3fa488a6187 (diff)
downloadruby-965d8241c1ac3556685a833206d644bd6c7dc646.tar.gz
string.c: keep coderange
* string.c (rb_str_setbyte): keep the code range as possible. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51873 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog4
-rw-r--r--string.c45
-rw-r--r--test/ruby/test_m17n.rb25
3 files changed, 72 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 7613b25796..2bf1fa6a30 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Wed Sep 16 14:49:58 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * string.c (rb_str_setbyte): keep the code range as possible.
+
Wed Sep 16 13:23:48 2015 NAKAMURA Usaku <usa@ruby-lang.org>
* doc/syntax/literals.rdoc (Strings): mention about ?a literal.
diff --git a/string.c b/string.c
index 9eab86398b..fd86b8bec4 100644
--- a/string.c
+++ b/string.c
@@ -4702,15 +4702,56 @@ rb_str_setbyte(VALUE str, VALUE index, VALUE value)
long pos = NUM2LONG(index);
int byte = NUM2INT(value);
long len = RSTRING_LEN(str);
+ char *head, *ptr, *left = 0;
+ rb_encoding *enc;
+ int cr = ENC_CODERANGE_UNKNOWN, width, nlen;
if (pos < -len || len <= pos)
rb_raise(rb_eIndexError, "index %ld out of string", pos);
if (pos < 0)
pos += len;
- rb_str_modify(str);
+ if (!str_independent(str))
+ str_make_independent(str);
+ enc = STR_ENC_GET(str);
+ head = RSTRING_PTR(str);
+ ptr = &head[pos];
+ if (len > RSTRING_EMBED_LEN_MAX) {
+ cr = ENC_CODERANGE(str);
+ switch (cr) {
+ case ENC_CODERANGE_7BIT:
+ left = ptr;
+ width = 1;
+ break;
+ case ENC_CODERANGE_VALID:
+ left = rb_enc_left_char_head(head, ptr, head+len, enc);
+ width = rb_enc_precise_mbclen(left, head+len, enc);
+ break;
+ default:
+ ENC_CODERANGE_CLEAR(str);
+ }
+ }
+ else {
+ ENC_CODERANGE_CLEAR(str);
+ }
+
+ *ptr = byte;
- RSTRING_PTR(str)[pos] = byte;
+ switch (cr) {
+ case ENC_CODERANGE_7BIT:
+ if (ISASCII(byte)) break;
+ case ENC_CODERANGE_VALID:
+ nlen = rb_enc_precise_mbclen(left, head+len, enc);
+ if (!MBCLEN_CHARFOUND_P(nlen))
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN);
+ else if (cr == ENC_CODERANGE_7BIT)
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
+ else if (MBCLEN_CHARFOUND_LEN(nlen) != width)
+ ENC_CODERANGE_CLEAR(str);
+ else if (ISASCII(byte)) /* may become 7BIT */
+ ENC_CODERANGE_CLEAR(str);
+ break;
+ }
return value;
}
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index 6b89ad572d..ca25f8502d 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -1488,6 +1488,31 @@ class TestM17N < Test::Unit::TestCase
s = u("\xE3\x81\x82\xE3\x81\x84")
s.setbyte(-4, 0x84)
assert_equal(u("\xE3\x81\x84\xE3\x81\x84"), s)
+
+ x = "x" * 100
+ t = nil
+ failure = proc {"#{i}: #{encdump(t)}"}
+
+ s = "\u{3042 3044}"
+ s.bytesize.times {|i|
+ t = s + x
+ t.setbyte(i, t.getbyte(i)+1)
+ assert_predicate(t, :valid_encoding?, failure)
+ assert_not_predicate(t, :ascii_only?, failure)
+ t = s + x
+ t.setbyte(i, 0x20)
+ assert_not_predicate(t, :valid_encoding?, failure)
+ }
+
+ s = "\u{41 42 43}"
+ s.bytesize.times {|i|
+ t = s + x
+ t.setbyte(i, 0x20)
+ assert_predicate(t, :valid_encoding?, failure)
+ assert_predicate(t, :ascii_only?, failure)
+ t.setbyte(i, 0xe3)
+ assert_not_predicate(t, :valid_encoding?, failure)
+ }
end
def test_compatible