From 15c6ab5602ea580010e439de272f95c9aae6b577 Mon Sep 17 00:00:00 2001 From: Lamont Granquist Date: Tue, 5 Jan 2016 12:34:04 -0800 Subject: fix validate_utf8: false encoding coercion the string we get back from ffi is tagged as ascii encoded by default, so we must force encode it first. then we don't want to convert from binary to utf-8 since that will wind up mangling all the utf-8 characters, we want to convert from utf-8 to utf-8 while replacing invalid characters. --- lib/ffi_yajl/encoder.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/ffi_yajl/encoder.rb b/lib/ffi_yajl/encoder.rb index f4dd8c8..4c3c570 100644 --- a/lib/ffi_yajl/encoder.rb +++ b/lib/ffi_yajl/encoder.rb @@ -41,7 +41,7 @@ module FFI_Yajl # call either the ext or ffi hook str = do_yajl_encode(obj, yajl_gen_opts, opts) # we can skip cleaning the whole string for utf-8 issues if we have yajl validate as we go - str.encode!("utf-8", "binary", undef: :replace) unless yajl_gen_opts[:yajl_gen_validate_utf8] + str.force_encoding("UTF-8").encode!("UTF-8", undef: :replace, invalid: :replace) unless yajl_gen_opts[:yajl_gen_validate_utf8] str end @@ -56,7 +56,7 @@ module FFI_Yajl def self.raise_error_for_status(status, token = nil) # scrub token to valid utf-8 since we may be issuing an exception on an invalid utf-8 token - token = token.to_s.encode("utf-8", "binary", undef: :replace) + token = token.to_s.force_encoding("UTF-8").encode("utf-8", undef: :replace, invalid: :replace) case status when 1 # yajl_gen_keys_must_be_strings raise FFI_Yajl::EncodeError, "YAJL internal error: attempted use of non-string object as key" -- cgit v1.2.1 From f5809701af8d4f3b52452ea1c943dc97b9a65da6 Mon Sep 17 00:00:00 2001 From: Lamont Granquist Date: Tue, 5 Jan 2016 13:40:08 -0800 Subject: use new ruby #scrub method where available --- lib/ffi_yajl/encoder.rb | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/ffi_yajl/encoder.rb b/lib/ffi_yajl/encoder.rb index 4c3c570..14e3801 100644 --- a/lib/ffi_yajl/encoder.rb +++ b/lib/ffi_yajl/encoder.rb @@ -41,7 +41,15 @@ module FFI_Yajl # call either the ext or ffi hook str = do_yajl_encode(obj, yajl_gen_opts, opts) # we can skip cleaning the whole string for utf-8 issues if we have yajl validate as we go - str.force_encoding("UTF-8").encode!("UTF-8", undef: :replace, invalid: :replace) unless yajl_gen_opts[:yajl_gen_validate_utf8] + + str.force_encoding("UTF-8") + unless yajl_gen_opts[:yajl_gen_validate_utf8] + if str.respond_to?(:scrub) + str.scrub! + else + str.encode!("UTF-8", 'binary', undef: :replace, invalid: :replace) + end + end str end @@ -56,7 +64,12 @@ module FFI_Yajl def self.raise_error_for_status(status, token = nil) # scrub token to valid utf-8 since we may be issuing an exception on an invalid utf-8 token - token = token.to_s.force_encoding("UTF-8").encode("utf-8", undef: :replace, invalid: :replace) + token = token.to_s.force_encoding("UTF-8") + if token.respond_to?(:scrub) + token.scrub! + else + token.encode("utf-8", 'binary', undef: :replace, invalid: :replace) + end case status when 1 # yajl_gen_keys_must_be_strings raise FFI_Yajl::EncodeError, "YAJL internal error: attempted use of non-string object as key" -- cgit v1.2.1 From 13b7548f0662b9d402ce0805fa452b28cde1d475 Mon Sep 17 00:00:00 2001 From: Lamont Granquist Date: Tue, 5 Jan 2016 13:46:49 -0800 Subject: trying to fix 1.9/2.0 --- lib/ffi_yajl/encoder.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/ffi_yajl/encoder.rb b/lib/ffi_yajl/encoder.rb index 14e3801..59450b1 100644 --- a/lib/ffi_yajl/encoder.rb +++ b/lib/ffi_yajl/encoder.rb @@ -47,7 +47,7 @@ module FFI_Yajl if str.respond_to?(:scrub) str.scrub! else - str.encode!("UTF-8", 'binary', undef: :replace, invalid: :replace) + str.encode!("UTF-8", undef: :replace, invalid: :replace) end end str @@ -68,7 +68,7 @@ module FFI_Yajl if token.respond_to?(:scrub) token.scrub! else - token.encode("utf-8", 'binary', undef: :replace, invalid: :replace) + token.encode("utf-8", undef: :replace, invalid: :replace) end case status when 1 # yajl_gen_keys_must_be_strings -- cgit v1.2.1 From c96a19984e27171ed75d3f76025cb8576b27f203 Mon Sep 17 00:00:00 2001 From: Lamont Granquist Date: Tue, 5 Jan 2016 13:53:25 -0800 Subject: fixes for 1.9 and 2.0 this is kind of shitty code, but once 2.0 and 1.9 are dropped and we can use #scrub then all the shitty code can go away... --- lib/ffi_yajl/encoder.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/ffi_yajl/encoder.rb b/lib/ffi_yajl/encoder.rb index 59450b1..3b07e95 100644 --- a/lib/ffi_yajl/encoder.rb +++ b/lib/ffi_yajl/encoder.rb @@ -47,7 +47,7 @@ module FFI_Yajl if str.respond_to?(:scrub) str.scrub! else - str.encode!("UTF-8", undef: :replace, invalid: :replace) + str.encode!("UTF-16le", undef: :replace, invalid: :replace).encode!('UTF-8') end end str @@ -68,7 +68,7 @@ module FFI_Yajl if token.respond_to?(:scrub) token.scrub! else - token.encode("utf-8", undef: :replace, invalid: :replace) + token.encode!("UTF-16le", undef: :replace, invalid: :replace).encode!('UTF-8') end case status when 1 # yajl_gen_keys_must_be_strings -- cgit v1.2.1