diff options
-rw-r--r-- | lib/ffi_yajl/encoder.rb | 17 | ||||
-rw-r--r-- | spec/ffi_yajl/encoder_spec.rb | 15 |
2 files changed, 29 insertions, 3 deletions
diff --git a/lib/ffi_yajl/encoder.rb b/lib/ffi_yajl/encoder.rb index f4dd8c8..3b07e95 100644 --- a/lib/ffi_yajl/encoder.rb +++ b/lib/ffi_yajl/encoder.rb @@ -41,7 +41,15 @@ module FFI_Yajl # call either the ext or ffi hook str = do_yajl_encode(obj, yajl_gen_opts, opts) # we can skip cleaning the whole string for utf-8 issues if we have yajl validate as we go - str.encode!("utf-8", "binary", undef: :replace) unless yajl_gen_opts[:yajl_gen_validate_utf8] + + str.force_encoding("UTF-8") + unless yajl_gen_opts[:yajl_gen_validate_utf8] + if str.respond_to?(:scrub) + str.scrub! + else + str.encode!("UTF-16le", undef: :replace, invalid: :replace).encode!('UTF-8') + end + end str end @@ -56,7 +64,12 @@ module FFI_Yajl def self.raise_error_for_status(status, token = nil) # scrub token to valid utf-8 since we may be issuing an exception on an invalid utf-8 token - token = token.to_s.encode("utf-8", "binary", undef: :replace) + token = token.to_s.force_encoding("UTF-8") + if token.respond_to?(:scrub) + token.scrub! + else + token.encode!("UTF-16le", undef: :replace, invalid: :replace).encode!('UTF-8') + end case status when 1 # yajl_gen_keys_must_be_strings raise FFI_Yajl::EncodeError, "YAJL internal error: attempted use of non-string object as key" diff --git a/spec/ffi_yajl/encoder_spec.rb b/spec/ffi_yajl/encoder_spec.rb index 91009f4..724277b 100644 --- a/spec/ffi_yajl/encoder_spec.rb +++ b/spec/ffi_yajl/encoder_spec.rb @@ -180,13 +180,14 @@ describe "FFI_Yajl::Encoder" do "passwd" => { "root" => { "dir" => "/root", "gid" => 0, "uid" => 0, "shell" => "/bin/sh", "gecos" => "Elan Ruusam\xc3\xa4e" }, "glen" => { "dir" => "/home/glen", "gid" => 500, "uid" => 500, "shell" => "/bin/bash", "gecos" => "Elan Ruusam\xE4e" }, + "helmüt" => { "dir" => "/home/helmüt", "gid" => 500, "uid" => 500, "shell" => "/bin/bash", "gecos" => "Hañs Helmüt" }, }, }, }, } it "raises an error on invalid json" do - expect { encoder.encode(ruby) }.to raise_error(FFI_Yajl::EncodeError, /Invalid UTF-8 string 'Elan Ruusam.e': cannot encode to UTF-8/) + expect { encoder.encode(ruby) }.to raise_error(FFI_Yajl::EncodeError, /Invalid UTF-8 string 'Elan Ruusam.*': cannot encode to UTF-8/) end context "when validate_utf8 is off" do @@ -203,6 +204,18 @@ describe "FFI_Yajl::Encoder" do it "returns valid utf8" do expect( encoder.encode(ruby).valid_encoding? ).to be true end + + it "does not mangle valid utf8" do + json = encoder.encode(ruby) + expect(json).to match(/Hañs Helmüt/) + end + + it "does not grow after a round trip" do + json = encoder.encode(ruby) + ruby2 = FFI_Yajl::Parser.parse(json) + json2 = encoder.encode(ruby2) + expect(json).to eql(json2) + end end end end |