diff options
author | Lamont Granquist <lamont@scriptkiddie.org> | 2016-01-06 08:03:39 -0800 |
---|---|---|
committer | Lamont Granquist <lamont@scriptkiddie.org> | 2016-01-06 08:03:39 -0800 |
commit | 609bf6dc2aad653c790c3f641272d93bb36590d6 (patch) | |
tree | eb9fc75d4b783d7b8825022651923b5aed70ba30 | |
parent | e5294ab56f26ccb35338032f656ac2e7362b61f1 (diff) | |
parent | c96a19984e27171ed75d3f76025cb8576b27f203 (diff) | |
download | ffi-yajl-609bf6dc2aad653c790c3f641272d93bb36590d6.tar.gz |
Merge pull request #75 from chef/lcg/fix-force-encoding
fix validate_utf8: false encoding coercion
-rw-r--r-- | lib/ffi_yajl/encoder.rb | 17 | ||||
-rw-r--r-- | spec/ffi_yajl/encoder_spec.rb | 15 |
2 files changed, 29 insertions, 3 deletions
diff --git a/lib/ffi_yajl/encoder.rb b/lib/ffi_yajl/encoder.rb index f4dd8c8..3b07e95 100644 --- a/lib/ffi_yajl/encoder.rb +++ b/lib/ffi_yajl/encoder.rb @@ -41,7 +41,15 @@ module FFI_Yajl # call either the ext or ffi hook str = do_yajl_encode(obj, yajl_gen_opts, opts) # we can skip cleaning the whole string for utf-8 issues if we have yajl validate as we go - str.encode!("utf-8", "binary", undef: :replace) unless yajl_gen_opts[:yajl_gen_validate_utf8] + + str.force_encoding("UTF-8") + unless yajl_gen_opts[:yajl_gen_validate_utf8] + if str.respond_to?(:scrub) + str.scrub! + else + str.encode!("UTF-16le", undef: :replace, invalid: :replace).encode!('UTF-8') + end + end str end @@ -56,7 +64,12 @@ module FFI_Yajl def self.raise_error_for_status(status, token = nil) # scrub token to valid utf-8 since we may be issuing an exception on an invalid utf-8 token - token = token.to_s.encode("utf-8", "binary", undef: :replace) + token = token.to_s.force_encoding("UTF-8") + if token.respond_to?(:scrub) + token.scrub! + else + token.encode!("UTF-16le", undef: :replace, invalid: :replace).encode!('UTF-8') + end case status when 1 # yajl_gen_keys_must_be_strings raise FFI_Yajl::EncodeError, "YAJL internal error: attempted use of non-string object as key" diff --git a/spec/ffi_yajl/encoder_spec.rb b/spec/ffi_yajl/encoder_spec.rb index 91009f4..724277b 100644 --- a/spec/ffi_yajl/encoder_spec.rb +++ b/spec/ffi_yajl/encoder_spec.rb @@ -180,13 +180,14 @@ describe "FFI_Yajl::Encoder" do "passwd" => { "root" => { "dir" => "/root", "gid" => 0, "uid" => 0, "shell" => "/bin/sh", "gecos" => "Elan Ruusam\xc3\xa4e" }, "glen" => { "dir" => "/home/glen", "gid" => 500, "uid" => 500, "shell" => "/bin/bash", "gecos" => "Elan Ruusam\xE4e" }, + "helmüt" => { "dir" => "/home/helmüt", "gid" => 500, "uid" => 500, "shell" => "/bin/bash", "gecos" => "Hañs Helmüt" }, }, }, }, } it "raises an error on invalid json" do - expect { encoder.encode(ruby) }.to raise_error(FFI_Yajl::EncodeError, /Invalid UTF-8 string 'Elan Ruusam.e': cannot encode to UTF-8/) + expect { encoder.encode(ruby) }.to raise_error(FFI_Yajl::EncodeError, /Invalid UTF-8 string 'Elan Ruusam.*': cannot encode to UTF-8/) end context "when validate_utf8 is off" do @@ -203,6 +204,18 @@ describe "FFI_Yajl::Encoder" do it "returns valid utf8" do expect( encoder.encode(ruby).valid_encoding? ).to be true end + + it "does not mangle valid utf8" do + json = encoder.encode(ruby) + expect(json).to match(/Hañs Helmüt/) + end + + it "does not grow after a round trip" do + json = encoder.encode(ruby) + ruby2 = FFI_Yajl::Parser.parse(json) + json2 = encoder.encode(ruby2) + expect(json).to eql(json2) + end end end end |