diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-03-21 12:36:14 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-03-21 12:36:14 +0000 |
commit | 83c2f60b88c73337bcb2fa86228974bddb95f2c8 (patch) | |
tree | 9127577263feb943ceb2ab3292ddb15c7e5eebd0 | |
parent | 66390013a1eb01c7159412e673965093f05f8161 (diff) | |
download | ruby-83c2f60b88c73337bcb2fa86228974bddb95f2c8.tar.gz |
* lib/uri/common.rb (URI.encode_www_form_component):
convert strings of HTML5 ASCII incompatible encoding
to UTF-8.
* lib/uri/common.rb (URI.encode_www_form_component):
"\x83\x41" of Shift_JIS should be encoded as "%83A".
This follows real implementations.
* lib/uri/common.rb (URI.decode_www_form_component):
use given encoding for force_encoding. [ruby-dev:40721]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27001 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | lib/uri/common.rb | 41 | ||||
-rw-r--r-- | test/uri/test_common.rb | 10 |
3 files changed, 34 insertions, 30 deletions
@@ -1,3 +1,16 @@ +Sun Mar 21 00:46:29 2010 NARUSE, Yui <naruse@ruby-lang.org> + + * lib/uri/common.rb (URI.encode_www_form_component): + convert strings of HTML5 ASCII incompatible encoding + to UTF-8. + + * lib/uri/common.rb (URI.encode_www_form_component): + "\x83\x41" of Shift_JIS should be encoded as "%83A". + This follows real implementations. + + * lib/uri/common.rb (URI.decode_www_form_component): + use given encoding for force_encoding. [ruby-dev:40721] + Sun Mar 21 21:09:17 2010 Tanaka Akira <akr@fsij.org> * lib/resolv-replace.rb: specify super class for rdoc. diff --git a/lib/uri/common.rb b/lib/uri/common.rb index d9aa15a4c3..5d0d95fb3f 100644 --- a/lib/uri/common.rb +++ b/lib/uri/common.rb @@ -722,6 +722,10 @@ module URI # :nodoc: TBLDECWWWCOMP_ = {} + # :nodoc: + HTML5ASCIIINCOMPAT = [Encoding::UTF_7, Encoding::UTF_16BE, Encoding::UTF_16LE, + Encoding::UTF_32BE, Encoding::UTF_32LE] + # Encode given +str+ to URL-encoded form data. # # This doesn't convert *, -, ., 0-9, A-Z, _, a-z, @@ -733,35 +737,19 @@ module URI def self.encode_www_form_component(str) if TBLENCWWWCOMP_.empty? 256.times do |i| - case i - when 0x20 - TBLENCWWWCOMP_[' '] = '+' - # when 0x2A, 0x2D, 0x2E, 0x30..0x39, 0x41..0x5A, 0x5F, 0x61..0x7A - else - TBLENCWWWCOMP_[i.chr] = '%%%02X' % i - end + TBLENCWWWCOMP_[i.chr] = '%%%02X' % i end + TBLENCWWWCOMP_[' '] = '+' TBLENCWWWCOMP_.freeze end str = str.to_s - case str.encoding - when Encoding::ASCII_8BIT, Encoding::US_ASCII, Encoding::UTF_8 - str = str.dup.force_encoding(Encoding::ASCII_8BIT) - str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_) - when Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE - reg = Regexp.new('[^*\-.0-9A-Z_a-z]+'.encode(str.encoding)) - str = str.gsub(reg){ - $&.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_). - force_encoding(str.encoding) - } + if HTML5ASCIIINCOMPAT.include?(str.encoding) + str = str.encode(Encoding::UTF_8) else - if str.encoding.ascii_compatible? - str = str.gsub(/[^*\-.0-9A-Z_a-z]+/){ - $&.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_)} - else - str = str.force_encoding(Encoding::ASCII_8BIT).gsub(/./, TBLENCWWWCOMP_) - end + str = str.dup end + str.force_encoding(Encoding::ASCII_8BIT) + str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_) str.force_encoding(Encoding::US_ASCII) end @@ -778,11 +766,11 @@ module URI TBLDECWWWCOMP_['%%%x%X' % [h, l]] = i.chr TBLDECWWWCOMP_['%%%X%x' % [h, l]] = i.chr TBLDECWWWCOMP_['%%%x%x' % [h, l]] = i.chr - TBLDECWWWCOMP_['+'] = ' ' if i == 0x20 end + TBLDECWWWCOMP_['+'] = ' ' TBLDECWWWCOMP_.freeze end - str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(Encoding::UTF_8) + str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc) end # Generate URL-encoded form data from given +enum+. @@ -794,7 +782,8 @@ module URI # # This doesn't convert encodings of give items, so convert them before call # this method if you want to send data as other than original encoding or - # mixed encoding data. + # mixed encoding data. (strings which is encoded in HTML5 ASCII incompatible + # encoding is converted to UTF-8) # # This doesn't treat files. When you send a file, use multipart/form-data. # diff --git a/test/uri/test_common.rb b/test/uri/test_common.rb index 730210e8b3..9f39e843d3 100644 --- a/test/uri/test_common.rb +++ b/test/uri/test_common.rb @@ -54,12 +54,12 @@ class TestCommon < Test::Unit::TestCase assert_equal("%00+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \ "AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E", URI.encode_www_form_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~")) - assert_equal("%95%41", URI.encode_www_form_component( + assert_equal("%95A", URI.encode_www_form_component( "\x95\x41".force_encoding(Encoding::Shift_JIS))) - assert_equal("%30%42", URI.encode_www_form_component( + assert_equal("%E3%81%82", URI.encode_www_form_component( "\x30\x42".force_encoding(Encoding::UTF_16BE))) - assert_equal("%30%42", URI.encode_www_form_component( - "\x30\x42".force_encoding(Encoding::ISO_2022_JP))) + assert_equal("%1B%24B%24%22%1B%28B", URI.encode_www_form_component( + "\e$B$\"\e(B".force_encoding(Encoding::ISO_2022_JP))) end def test_decode_www_form_component @@ -67,6 +67,8 @@ class TestCommon < Test::Unit::TestCase URI.decode_www_form_component( "%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \ "AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E")) + assert_equal("\xA1\xA2".force_encoding(Encoding::EUC_JP), + URI.decode_www_form_component("%A1%A2", "EUC-JP")) end def test_encode_www_form |