From a9ae440827c27ee2cb7d815559fee40d336767c1 Mon Sep 17 00:00:00 2001 From: Lamont Granquist Date: Tue, 21 Apr 2015 18:42:24 -0700 Subject: emit token that failed utf-8 validation plus code cleanup of the c-extension --- ext/ffi_yajl/ext/encoder/encoder.c | 340 ++++++++++++++++++------------------- lib/ffi_yajl/encoder.rb | 6 +- lib/ffi_yajl/ffi/encoder.rb | 62 +++---- spec/ffi_yajl/encoder_spec.rb | 2 +- 4 files changed, 202 insertions(+), 208 deletions(-) diff --git a/ext/ffi_yajl/ext/encoder/encoder.c b/ext/ffi_yajl/ext/encoder/encoder.c index fdc6f56..49bd9ed 100644 --- a/ext/ffi_yajl/ext/encoder/encoder.c +++ b/ext/ffi_yajl/ext/encoder/encoder.c @@ -7,9 +7,6 @@ static VALUE cYajl_Gen; /* FIXME: the json gem does a whole bunch of indirection around monkeypatching... not sure if we need to as well... */ -#define CHECK_STATUS(call) \ - if ((status = (call)) != yajl_gen_status_ok) { rb_funcall(mEncoder2, rb_intern("raise_error_for_status"), 1, INT2FIX(status)); } - static VALUE mEncoder_do_yajl_encode(VALUE self, VALUE obj, VALUE yajl_gen_opts, VALUE json_opts) { ID sym_ffi_yajl = rb_intern("ffi_yajl"); VALUE sym_yajl_gen_beautify = ID2SYM(rb_intern("yajl_gen_beautify")); @@ -72,290 +69,285 @@ int rb_cHash_ffi_yajl_callback(VALUE key, VALUE val, VALUE extra) { return 0; } -static VALUE rb_cHash_ffi_yajl(VALUE self, VALUE rb_yajl_gen, VALUE state) { +#define RB_FUNC0(call) rb_funcall(self, rb_intern(call), 0) + +/* + * wrappers around yajl_gen_* functions + */ + +/* encode a c-string as a yajl string */ +VALUE gen_cstring(VALUE rb_yajl_gen, char *cptr, int len) { yajl_gen_status status; - VALUE extra; struct yajl_gen_t *yajl_gen; Data_Get_Struct(rb_yajl_gen, struct yajl_gen_t, yajl_gen); - if ( rb_hash_aref(state, rb_str_new2("processing_key")) == Qtrue ) { - ID sym_to_s = rb_intern("to_s"); - VALUE str = rb_funcall(self, sym_to_s, 0); - char *cptr = RSTRING_PTR(str); - int len = RSTRING_LEN(str); - - CHECK_STATUS( - yajl_gen_string(yajl_gen, (unsigned char *)cptr, len) - ); - } else { - extra = rb_hash_new(); /* FIXME: reduce garbage */ + if ((status = yajl_gen_string(yajl_gen, (unsigned char *)cptr, len)) != yajl_gen_status_ok) { + rb_funcall(mEncoder2, rb_intern("raise_error_for_status"), 2, INT2FIX(status), rb_str_new(cptr, len)); + } - rb_hash_aset(extra, rb_str_new2("yajl_gen"), rb_yajl_gen); + return Qnil; +} - rb_hash_aset(extra, rb_str_new2("state"), state); +/* encode a ruby-sring as a yajl string */ +VALUE gen_string(VALUE rb_yajl_gen, VALUE str) { + char *cptr = RSTRING_PTR(str); + int len = RSTRING_LEN(str); - CHECK_STATUS( - yajl_gen_map_open(yajl_gen) - ); - rb_hash_foreach(self, rb_cHash_ffi_yajl_callback, extra); - CHECK_STATUS( - yajl_gen_map_close(yajl_gen) - ); + return gen_cstring(rb_yajl_gen, cptr, len); +} + +/* calls #to_s on an object to encode it as a yajl string */ +static VALUE gen_string_to_s(VALUE rb_yajl_gen, VALUE self) { + return gen_string(rb_yajl_gen, RB_FUNC0("to_s")); +} + +/* encode a ruby string as a yajl number (also used to embed already-rendered json from #to_json) */ +VALUE gen_number(VALUE rb_yajl_gen, VALUE str) { + yajl_gen_status status; + struct yajl_gen_t *yajl_gen; + Data_Get_Struct(rb_yajl_gen, struct yajl_gen_t, yajl_gen); + char *cptr = RSTRING_PTR(str); + int len = RSTRING_LEN(str); + + if ((status = yajl_gen_number(yajl_gen, cptr, len)) != yajl_gen_status_ok) { + rb_funcall(mEncoder2, rb_intern("raise_error_for_status"), 2, INT2FIX(status), str); } return Qnil; } -static VALUE rb_cArray_ffi_yajl(VALUE self, VALUE rb_yajl_gen, VALUE state) { +/* encode hash open */ +VALUE gen_map_open(VALUE rb_yajl_gen) { yajl_gen_status status; - ID sym_ffi_yajl = rb_intern("ffi_yajl"); - long i; - VALUE val; struct yajl_gen_t *yajl_gen; Data_Get_Struct(rb_yajl_gen, struct yajl_gen_t, yajl_gen); - if ( rb_hash_aref(state, rb_str_new2("processing_key")) == Qtrue ) { - ID sym_to_s = rb_intern("to_s"); - VALUE str = rb_funcall(self, sym_to_s, 0); - char *cptr = RSTRING_PTR(str); - int len = RSTRING_LEN(str); - - CHECK_STATUS( - yajl_gen_string(yajl_gen, (unsigned char *)cptr, len) - ); - } else { - CHECK_STATUS( - yajl_gen_array_open(yajl_gen) - ); - for(i=0; i#to_ffi_yajl() method calls + */ + +static VALUE rb_cHash_ffi_yajl(VALUE self, VALUE rb_yajl_gen, VALUE state) { if ( rb_hash_aref(state, rb_str_new2("processing_key")) == Qtrue ) { - CHECK_STATUS( - yajl_gen_string(yajl_gen, (unsigned char *)cptr, len) - ); + gen_string(rb_yajl_gen, rb_funcall(self, rb_intern("to_s"), 0)); } else { - CHECK_STATUS( - yajl_gen_number(yajl_gen, cptr, len) - ); + + /* FIXME: i think this got refactored from something else and it is now pointless -- + should just pass rb_yajl_gen directly instead of this 'extra' hash -- i don't + *think* this indirection is doing anything useful to mark memory against the GC */ + + VALUE extra = rb_hash_new(); + + rb_hash_aset(extra, rb_str_new2("yajl_gen"), rb_yajl_gen); + + rb_hash_aset(extra, rb_str_new2("state"), state); + + gen_map_open(rb_yajl_gen); + + rb_hash_foreach(self, rb_cHash_ffi_yajl_callback, extra); + + gen_map_close(rb_yajl_gen); } return Qnil; } -static VALUE rb_cFloat_ffi_yajl(VALUE self, VALUE rb_yajl_gen, VALUE state) { - yajl_gen_status status; - ID sym_to_s = rb_intern("to_s"); - VALUE str = rb_funcall(self, sym_to_s, 0); - char *cptr = RSTRING_PTR(str); - int len = RSTRING_LEN(str); - struct yajl_gen_t *yajl_gen; - Data_Get_Struct(rb_yajl_gen, struct yajl_gen_t, yajl_gen); +static VALUE rb_cArray_ffi_yajl(VALUE self, VALUE rb_yajl_gen, VALUE state) { + if ( rb_hash_aref(state, rb_str_new2("processing_key")) == Qtrue ) { + gen_string(rb_yajl_gen, rb_funcall(self, rb_intern("to_s"), 0)); + } else { + VALUE val; + long i; + ID sym_ffi_yajl = rb_intern("ffi_yajl"); - if (memcmp(cptr, "NaN", 3) == 0 || memcmp(cptr, "Infinity", 8) == 0 || memcmp(cptr, "-Infinity", 9) == 0) { - rb_raise(cEncodeError, "'%s' is an invalid number", cptr); + gen_array_open(rb_yajl_gen); + + for(i=0; i :replace) case status when 1 # yajl_gen_keys_must_be_strings raise FFI_Yajl::EncodeError, "YAJL internal error: attempted use of non-string object as key" @@ -68,7 +70,7 @@ module FFI_Yajl when 6 # yajl_gen_no_buf raise FFI_Yajl::EncodeError, "YAJL internal error: yajl_gen_get_buf was called, but a print callback was specified, so no internal buffer is available" when 7 # yajl_gen_invalid_string - raise FFI_Yajl::EncodeError, "Invalid UTF-8 string: cannot encode to UTF-8" + raise FFI_Yajl::EncodeError, "Invalid UTF-8 string '#{token}': cannot encode to UTF-8" else raise FFI_Yajl::EncodeError, "Unknown YAJL Error (#{status}), please report this as a bug" end diff --git a/lib/ffi_yajl/ffi/encoder.rb b/lib/ffi_yajl/ffi/encoder.rb index 73a0d11..f2338e4 100644 --- a/lib/ffi_yajl/ffi/encoder.rb +++ b/lib/ffi_yajl/ffi/encoder.rb @@ -70,11 +70,11 @@ class Hash if state[:processing_key] str = self.to_s if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end else if ( status = FFI_Yajl.yajl_gen_map_open(yajl_gen) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, '{') end self.each do |key, value| # Perf Fix: mutate state hash rather than creating new copy @@ -84,7 +84,7 @@ class Hash value.ffi_yajl(yajl_gen, state) end if ( status = FFI_Yajl.yajl_gen_map_close(yajl_gen) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, '}') end end end @@ -95,17 +95,17 @@ class Array if state[:processing_key] str = self.to_s if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end else if ( status = FFI_Yajl.yajl_gen_array_open(yajl_gen) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, '[') end self.each do |value| value.ffi_yajl(yajl_gen, state) end if ( status = FFI_Yajl.yajl_gen_array_close(yajl_gen) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, ']') end end end @@ -113,14 +113,14 @@ end class NilClass def ffi_yajl(yajl_gen, state) + str = self.to_s if state[:processing_key] - str = self.to_s if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end else if ( status = FFI_Yajl.yajl_gen_null(yajl_gen) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end end end @@ -128,14 +128,14 @@ end class TrueClass def ffi_yajl(yajl_gen, state) + str = self.to_s if state[:processing_key] - str = self.to_s if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end else if ( status = FFI_Yajl.yajl_gen_bool(yajl_gen, 1) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end end end @@ -143,14 +143,14 @@ end class FalseClass def ffi_yajl(yajl_gen, state) + str = self.to_s if state[:processing_key] - str = self.to_s if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end else if ( status = FFI_Yajl.yajl_gen_bool(yajl_gen, 0) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end end end @@ -164,11 +164,11 @@ class Fixnum end if state[:processing_key] if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end else if ( status = FFI_Yajl.yajl_gen_integer(yajl_gen, self) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end end end @@ -182,11 +182,11 @@ class Bignum end if state[:processing_key] if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end else if ( status = FFI_Yajl.yajl_gen_number(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end end end @@ -200,11 +200,11 @@ class Float end if state[:processing_key] if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end else if ( status = FFI_Yajl.yajl_gen_number(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end end end @@ -214,7 +214,7 @@ class Symbol def ffi_yajl(yajl_gen, state) str = self.to_s if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end end end @@ -222,7 +222,7 @@ end class String def ffi_yajl(yajl_gen, state) if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, self, self.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, self) end end end @@ -231,7 +231,7 @@ class StringIO def ffi_yajl(yajl_gen, state) str = self.read if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end end end @@ -240,7 +240,7 @@ class Date def ffi_yajl(yajl_gen, state) str = self.to_s if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end end end @@ -249,16 +249,16 @@ class Time def ffi_yajl(yajl_gen, state) str = self.strftime "%Y-%m-%d %H:%M:%S %z" if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end end end -class DateTime +class DateTime < Date def ffi_yajl(yajl_gen, state) str = self.to_s if ( status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end end end @@ -267,15 +267,15 @@ end class Object def ffi_yajl(yajl_gen, state) if !state[:processing_key] && self.respond_to?(:to_json) - json = self.to_json(state[:json_opts]) + str = self.to_json(state[:json_opts]) # #yajl_gen_number outputs a string without quotes around it - status = FFI_Yajl.yajl_gen_number(yajl_gen, json, json.bytesize) + status = FFI_Yajl.yajl_gen_number(yajl_gen, str, str.bytesize) else str = self.to_s status = FFI_Yajl.yajl_gen_string(yajl_gen, str, str.bytesize) end if ( status ) != 0 - FFI_Yajl::Encoder.raise_error_for_status(status) + FFI_Yajl::Encoder.raise_error_for_status(status, str) end end end diff --git a/spec/ffi_yajl/encoder_spec.rb b/spec/ffi_yajl/encoder_spec.rb index 0eef23d..5691bf6 100644 --- a/spec/ffi_yajl/encoder_spec.rb +++ b/spec/ffi_yajl/encoder_spec.rb @@ -187,7 +187,7 @@ describe "FFI_Yajl::Encoder" do } it "raises an error on invalid json" do - expect{ encoder.encode(ruby) }.to raise_error(FFI_Yajl::EncodeError, "Invalid UTF-8 string: cannot encode to UTF-8") + expect{ encoder.encode(ruby) }.to raise_error(FFI_Yajl::EncodeError, /Invalid UTF-8 string 'Elan Ruusam.e': cannot encode to UTF-8/) end context "when validate_utf8 is off" do -- cgit v1.2.1