diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2013-04-19 17:50:38 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2013-04-19 17:50:38 +0000 |
commit | 394d5dfa9ba625c99a1e6a411f81b628bfebd60a (patch) | |
tree | 22a788ad3df769fa9c3b4f5f81efde5b1cad87a4 /test/ruby/test_m17n.rb | |
parent | 57ffc79c4561b9249ef6b81101f1887f16f29e19 (diff) | |
download | ruby-394d5dfa9ba625c99a1e6a411f81b628bfebd60a.tar.gz |
* string.c (str_scrub): add ruby method String#scrub which verify and
fix invalid byte sequence.
* string.c (str_compat_and_valid): check given string is compatible
and valid with given encoding.
* transcode.c (str_transcode0): If invalid: :replace is specified for
String#encode, replace invalid byte sequence even if the destination
encoding equals to the source encoding.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@40390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'test/ruby/test_m17n.rb')
-rw-r--r-- | test/ruby/test_m17n.rb | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index a8d56a4a56..60834bb9c6 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -1489,4 +1489,38 @@ class TestM17N < Test::Unit::TestCase s.untrust assert_equal(true, s.b.untrusted?) end + + def test_scrub + assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub) + assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub) + + # exapmles in Unicode 6.1.0 D93b + assert_equal("\x41\uFFFD\uFFFD\x41\uFFFD\x41", + u("\x41\xC0\xAF\x41\xF4\x80\x80\x41").scrub) + assert_equal("\x41\uFFFD\uFFFD\uFFFD\x41", + u("\x41\xE0\x9F\x80\x41").scrub) + assert_equal("\u0061\uFFFD\uFFFD\uFFFD\u0062\uFFFD\u0063\uFFFD\uFFFD\u0064", + u("\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64").scrub) + assert_equal("abcdefghijklmnopqrstuvwxyz\u0061\uFFFD\uFFFD\uFFFD\u0062\uFFFD\u0063\uFFFD\uFFFD\u0064", + u("abcdefghijklmnopqrstuvwxyz\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64").scrub) + + assert_equal("\u3042\u3013", u("\xE3\x81\x82\xE3\x81").scrub("\u3013")) + assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub(e("\xA4\xA2")) } + assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub(1) } + assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub(u("\x81")) } + assert_equal(e("\xA4\xA2\xA2\xAE"), e("\xA4\xA2\xA4").scrub(e("\xA2\xAE"))) + + assert_equal("\u3042<e381>", u("\xE3\x81\x82\xE3\x81").scrub{|x|'<'+x.unpack('H*')[0]+'>'}) + assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub{e("\xA4\xA2")} } + assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub{1} } + assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub{u("\x81")} } + assert_equal(e("\xA4\xA2\xA2\xAE"), e("\xA4\xA2\xA4").scrub{e("\xA2\xAE")}) + + assert_equal("\uFFFD\u3042".encode("UTF-16BE"), + "\xD8\x00\x30\x42".force_encoding(Encoding::UTF_16BE). + scrub) + assert_equal("\uFFFD\u3042".encode("UTF-16LE"), + "\x00\xD8\x42\x30".force_encoding(Encoding::UTF_16LE). + scrub) + end end |