summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLamont Granquist <lamont@scriptkiddie.org>2016-01-06 08:03:39 -0800
committerLamont Granquist <lamont@scriptkiddie.org>2016-01-06 08:03:39 -0800
commit609bf6dc2aad653c790c3f641272d93bb36590d6 (patch)
treeeb9fc75d4b783d7b8825022651923b5aed70ba30
parente5294ab56f26ccb35338032f656ac2e7362b61f1 (diff)
parentc96a19984e27171ed75d3f76025cb8576b27f203 (diff)
downloadffi-yajl-609bf6dc2aad653c790c3f641272d93bb36590d6.tar.gz
Merge pull request #75 from chef/lcg/fix-force-encoding
fix validate_utf8: false encoding coercion
-rw-r--r--lib/ffi_yajl/encoder.rb17
-rw-r--r--spec/ffi_yajl/encoder_spec.rb15
2 files changed, 29 insertions, 3 deletions
diff --git a/lib/ffi_yajl/encoder.rb b/lib/ffi_yajl/encoder.rb
index f4dd8c8..3b07e95 100644
--- a/lib/ffi_yajl/encoder.rb
+++ b/lib/ffi_yajl/encoder.rb
@@ -41,7 +41,15 @@ module FFI_Yajl
# call either the ext or ffi hook
str = do_yajl_encode(obj, yajl_gen_opts, opts)
# we can skip cleaning the whole string for utf-8 issues if we have yajl validate as we go
- str.encode!("utf-8", "binary", undef: :replace) unless yajl_gen_opts[:yajl_gen_validate_utf8]
+
+ str.force_encoding("UTF-8")
+ unless yajl_gen_opts[:yajl_gen_validate_utf8]
+ if str.respond_to?(:scrub)
+ str.scrub!
+ else
+ str.encode!("UTF-16le", undef: :replace, invalid: :replace).encode!('UTF-8')
+ end
+ end
str
end
@@ -56,7 +64,12 @@ module FFI_Yajl
def self.raise_error_for_status(status, token = nil)
# scrub token to valid utf-8 since we may be issuing an exception on an invalid utf-8 token
- token = token.to_s.encode("utf-8", "binary", undef: :replace)
+ token = token.to_s.force_encoding("UTF-8")
+ if token.respond_to?(:scrub)
+ token.scrub!
+ else
+ token.encode!("UTF-16le", undef: :replace, invalid: :replace).encode!('UTF-8')
+ end
case status
when 1 # yajl_gen_keys_must_be_strings
raise FFI_Yajl::EncodeError, "YAJL internal error: attempted use of non-string object as key"
diff --git a/spec/ffi_yajl/encoder_spec.rb b/spec/ffi_yajl/encoder_spec.rb
index 91009f4..724277b 100644
--- a/spec/ffi_yajl/encoder_spec.rb
+++ b/spec/ffi_yajl/encoder_spec.rb
@@ -180,13 +180,14 @@ describe "FFI_Yajl::Encoder" do
"passwd" => {
"root" => { "dir" => "/root", "gid" => 0, "uid" => 0, "shell" => "/bin/sh", "gecos" => "Elan Ruusam\xc3\xa4e" },
"glen" => { "dir" => "/home/glen", "gid" => 500, "uid" => 500, "shell" => "/bin/bash", "gecos" => "Elan Ruusam\xE4e" },
+ "helmüt" => { "dir" => "/home/helmüt", "gid" => 500, "uid" => 500, "shell" => "/bin/bash", "gecos" => "Hañs Helmüt" },
},
},
},
}
it "raises an error on invalid json" do
- expect { encoder.encode(ruby) }.to raise_error(FFI_Yajl::EncodeError, /Invalid UTF-8 string 'Elan Ruusam.e': cannot encode to UTF-8/)
+ expect { encoder.encode(ruby) }.to raise_error(FFI_Yajl::EncodeError, /Invalid UTF-8 string 'Elan Ruusam.*': cannot encode to UTF-8/)
end
context "when validate_utf8 is off" do
@@ -203,6 +204,18 @@ describe "FFI_Yajl::Encoder" do
it "returns valid utf8" do
expect( encoder.encode(ruby).valid_encoding? ).to be true
end
+
+ it "does not mangle valid utf8" do
+ json = encoder.encode(ruby)
+ expect(json).to match(/Hañs Helmüt/)
+ end
+
+ it "does not grow after a round trip" do
+ json = encoder.encode(ruby)
+ ruby2 = FFI_Yajl::Parser.parse(json)
+ json2 = encoder.encode(ruby2)
+ expect(json).to eql(json2)
+ end
end
end
end