From b219eed7bbae2840e45fd3325cecf0fb415f4327 Mon Sep 17 00:00:00 2001 From: Florian Frank Date: Thu, 1 Oct 2009 10:57:48 +0200 Subject: different default in fast_generate fast_generate now defaults to throwing an exception if an infinite or nan float is encountered. This is the same behaviour as in generate and pretty_generate. (Fix for http://github.com/flori/json/issues#issue/3) --- CHANGES | 2 ++ ext/json/ext/generator/generator.c | 4 ++-- lib/json/pure/generator.rb | 4 ++-- tests/test_json_generate.rb | 6 +++--- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CHANGES b/CHANGES index 9d80469..cb67705 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ +2009-10-01 (1.2.0) + * fast_generate now raises an exeception for nan and infinite floats. 2009-08-23 (1.1.9) * Added forgotten main doc file extra_rdoc_files. 2009-08-23 (1.1.8) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index c6c63c1..9bcd580 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -315,14 +315,14 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) rb_scan_args(argc, argv, "01*", &Vstate, &rest); if (!NIL_P(Vstate)) Data_Get_Struct(Vstate, JSON_Generator_State, state); if (isinf(value)) { - if (!state || state->allow_nan) { + if (state && state->allow_nan) { result = rb_funcall(self, i_to_s, 0); } else { tmp = rb_funcall(self, i_to_s, 0); rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp)); } } else if (isnan(value)) { - if (!state || state->allow_nan) { + if (state && state->allow_nan) { result = rb_funcall(self, i_to_s, 0); } else { tmp = rb_funcall(self, i_to_s, 0); diff --git a/lib/json/pure/generator.rb b/lib/json/pure/generator.rb index d51316e..2b7eeb7 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/pure/generator.rb @@ -351,13 +351,13 @@ module JSON def to_json(state = nil, *) case when infinite? - if !state || state.allow_nan? + if state && state.allow_nan? to_s else raise GeneratorError, "#{self} not allowed in JSON" end when nan? - if !state || state.allow_nan? + if state && state.allow_nan? to_s else raise GeneratorError, "#{self} not allowed in JSON" diff --git a/tests/test_json_generate.rb b/tests/test_json_generate.rb index bbb75ba..ae3ce0e 100755 --- a/tests/test_json_generate.rb +++ b/tests/test_json_generate.rb @@ -89,17 +89,17 @@ EOT def test_allow_nan assert_raises(GeneratorError) { generate([JSON::NaN]) } assert_equal '[NaN]', generate([JSON::NaN], :allow_nan => true) - assert_equal '[NaN]', fast_generate([JSON::NaN]) + assert_raises(GeneratorError) { fast_generate([JSON::NaN]) } assert_raises(GeneratorError) { pretty_generate([JSON::NaN]) } assert_equal "[\n NaN\n]", pretty_generate([JSON::NaN], :allow_nan => true) assert_raises(GeneratorError) { generate([JSON::Infinity]) } assert_equal '[Infinity]', generate([JSON::Infinity], :allow_nan => true) - assert_equal '[Infinity]', fast_generate([JSON::Infinity]) + assert_raises(GeneratorError) { fast_generate([JSON::Infinity]) } assert_raises(GeneratorError) { pretty_generate([JSON::Infinity]) } assert_equal "[\n Infinity\n]", pretty_generate([JSON::Infinity], :allow_nan => true) assert_raises(GeneratorError) { generate([JSON::MinusInfinity]) } assert_equal '[-Infinity]', generate([JSON::MinusInfinity], :allow_nan => true) - assert_equal '[-Infinity]', fast_generate([JSON::MinusInfinity]) + assert_raises(GeneratorError) { fast_generate([JSON::MinusInfinity]) } assert_raises(GeneratorError) { pretty_generate([JSON::MinusInfinity]) } assert_equal "[\n -Infinity\n]", pretty_generate([JSON::MinusInfinity], :allow_nan => true) end -- cgit v1.2.1 From d9f9557594840b0381007d2dad769d473adc59f3 Mon Sep 17 00:00:00 2001 From: Florian Frank Date: Thu, 1 Oct 2009 12:05:00 +0200 Subject: added additional checks for generate methods --- lib/json/common.rb | 40 +++++++++++++++++++++++++--------------- tests/test_json.rb | 10 +++++----- tests/test_json_generate.rb | 22 ++++++++++++++++++---- tests/test_json_rails.rb | 2 +- 4 files changed, 49 insertions(+), 25 deletions(-) diff --git a/lib/json/common.rb b/lib/json/common.rb index 467c7b2..c7808fb 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -105,7 +105,7 @@ module JSON module_function - # Parse the JSON string _source_ into a Ruby data structure and return it. + # Parse the JSON document _source_ into a Ruby data structure and return it. # # _opts_ can have the following # keys: @@ -122,9 +122,9 @@ module JSON JSON.parser.new(source, opts).parse end - # Parse the JSON string _source_ into a Ruby data structure and return it. + # Parse the JSON document _source_ into a Ruby data structure and return it. # The bang version of the parse method, defaults to the more dangerous values - # for the _opts_ hash, so be sure only to parse trusted _source_ strings. + # for the _opts_ hash, so be sure only to parse trusted _source_ documents. # # _opts_ can have the following keys: # * *max_nesting*: The maximum depth of nesting allowed in the parsed data @@ -145,9 +145,8 @@ module JSON JSON.parser.new(source, opts).parse end - # Unparse the Ruby data structure _obj_ into a single line JSON string and - # return it. _state_ is - # * a JSON::State object, + # Generate a JSON document from the Ruby data structure _obj_ and return + # it. _state_ is * a JSON::State object, # * or a Hash like object (responding to to_hash), # * an object convertible into a hash by a to_h method, # that is used as or to configure a State object. @@ -180,7 +179,11 @@ module JSON else state = State.new end - obj.to_json(state) + result = obj.to_json(state) + if result !~ /\A\s*(?:\[.*\]|\{.*\})\s*\Z/m + raise GeneratorError, "only generation of JSON objects or arrays allowed" + end + result end # :stopdoc: @@ -190,14 +193,17 @@ module JSON module_function :unparse # :startdoc: - # Unparse the Ruby data structure _obj_ into a single line JSON string and - # return it. This method disables the checks for circles in Ruby objects, and - # also generates NaN, Infinity, and, -Infinity float values. + # Generate a JSON document from the Ruby data structure _obj_ and return it. + # This method disables the checks for circles in Ruby objects. # # *WARNING*: Be careful not to pass any Ruby data structures with circles as # _obj_ argument, because this will cause JSON to go into an infinite loop. def fast_generate(obj) - obj.to_json(nil) + result = obj.to_json(nil) + if result !~ /\A(?:\[.*\]|\{.*\})\Z/ + raise GeneratorError, "only generation of JSON objects or arrays allowed" + end + result end # :stopdoc: @@ -206,8 +212,9 @@ module JSON module_function :fast_unparse # :startdoc: - # Unparse the Ruby data structure _obj_ into a JSON string and return it. The - # returned string is a prettier form of the string returned by #unparse. + # Generate a JSON document from the Ruby data structure _obj_ and return it. + # The returned document is a prettier form of the document returned by + # #unparse. # # The _opts_ argument can be used to configure the generator, see the # generate method for a more detailed explanation. @@ -229,7 +236,11 @@ module JSON end state.configure(opts) end - obj.to_json(state) + result = obj.to_json(state) + if result !~ /\A\s*(?:\[.*\]|\{.*\})\s*\Z/m + raise GeneratorError, "only generation of JSON objects or arrays allowed" + end + result end # :stopdoc: @@ -270,7 +281,6 @@ module JSON proc.call result end end - module_function :recurse_proc alias restore load module_function :restore diff --git a/tests/test_json.rb b/tests/test_json.rb index 5307609..5d71d61 100755 --- a/tests/test_json.rb +++ b/tests/test_json.rb @@ -222,27 +222,27 @@ EOT def test_backslash data = [ '\\.(?i:gif|jpe?g|png)$' ] json = '["\\\\.(?i:gif|jpe?g|png)$"]' - assert_equal json, JSON.unparse(data) + assert_equal json, JSON.generate(data) assert_equal data, JSON.parse(json) # data = [ '\\"' ] json = '["\\\\\""]' - assert_equal json, JSON.unparse(data) + assert_equal json, JSON.generate(data) assert_equal data, JSON.parse(json) # json = '["/"]' data = JSON.parse(json) assert_equal ['/'], data - assert_equal json, JSON.unparse(data) + assert_equal json, JSON.generate(data) # json = '["\""]' data = JSON.parse(json) assert_equal ['"'], data - assert_equal json, JSON.unparse(data) + assert_equal json, JSON.generate(data) json = '["\\\'"]' data = JSON.parse(json) assert_equal ["'"], data - assert_equal '["\'"]', JSON.unparse(data) + assert_equal '["\'"]', JSON.generate(data) end def test_wrong_inputs diff --git a/tests/test_json_generate.rb b/tests/test_json_generate.rb index ae3ce0e..e725e6f 100755 --- a/tests/test_json_generate.rb +++ b/tests/test_json_generate.rb @@ -44,8 +44,8 @@ class TC_JSONGenerate < Test::Unit::TestCase EOT end - def test_unparse - json = unparse(@hash) + def test_generate + json = generate(@hash) assert_equal(JSON.parse(@json2), JSON.parse(json)) parsed_json = parse(json) assert_equal(@hash, parsed_json) @@ -53,10 +53,11 @@ EOT assert_equal('{"1":2}', json) parsed_json = parse(json) assert_equal({"1"=>2}, parsed_json) + assert_raise(GeneratorError) { generate(666) } end - def test_unparse_pretty - json = pretty_unparse(@hash) + def test_generate_pretty + json = pretty_generate(@hash) assert_equal(JSON.parse(@json3), JSON.parse(json)) parsed_json = parse(json) assert_equal(@hash, parsed_json) @@ -68,6 +69,19 @@ EOT EOT parsed_json = parse(json) assert_equal({"1"=>2}, parsed_json) + assert_raise(GeneratorError) { pretty_generate(666) } + end + + def test_fast_generate + json = fast_generate(@hash) + assert_equal(JSON.parse(@json2), JSON.parse(json)) + parsed_json = parse(json) + assert_equal(@hash, parsed_json) + json = fast_generate({1=>2}) + assert_equal('{"1":2}', json) + parsed_json = parse(json) + assert_equal({"1"=>2}, parsed_json) + assert_raise(GeneratorError) { fast_generate(666) } end def test_states diff --git a/tests/test_json_rails.rb b/tests/test_json_rails.rb index 341d332..d33402d 100755 --- a/tests/test_json_rails.rb +++ b/tests/test_json_rails.rb @@ -141,6 +141,6 @@ EOT end def test_symbol - assert_equal '"foo"', JSON(:foo) # we don't want an object here + assert_equal '"foo"', :foo.to_json # we don't want an object here end end -- cgit v1.2.1 From 03b157516fa22ac135496a3831963f8305f7a0bb Mon Sep 17 00:00:00 2001 From: Florian Frank Date: Thu, 15 Oct 2009 21:02:49 +0200 Subject: implemented utf sniffing, transcoding in parser improved documentation added to changes --- CHANGES | 5 + README | 350 ++++++++++++++++++++++++++++++++++++++---- Rakefile | 54 ++++--- doc-main.txt | 283 ---------------------------------- ext/json/ext/parser/parser.c | 233 +++++++++++++++++----------- ext/json/ext/parser/parser.rl | 85 ++++++++-- lib/json/common.rb | 6 + lib/json/pure/parser.rb | 36 ++++- tests/test_json_encoding.rb | 57 +++++++ 9 files changed, 661 insertions(+), 448 deletions(-) delete mode 100644 doc-main.txt create mode 100644 tests/test_json_encoding.rb diff --git a/CHANGES b/CHANGES index cb67705..d7347db 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,10 @@ 2009-10-01 (1.2.0) * fast_generate now raises an exeception for nan and infinite floats. + * On Ruby 1.8 json supports parsing of UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, + and UTF-32LE JSON documents now. Under Ruby 1.9 the M17n conversion + functions are used to convert from all supported encodings. ASCII-8BIT + encoded strings are handled like all strings under Ruby 1.8 were. + * Better documentation 2009-08-23 (1.1.9) * Added forgotten main doc file extra_rdoc_files. 2009-08-23 (1.1.8) diff --git a/README b/README index 14ef17f..2dd55e2 100644 --- a/README +++ b/README @@ -1,78 +1,360 @@ -Dependencies for Building -========================= +== json - JSON Implementation for Ruby -- You need rake to build the extensions and install them. +=== Description - You can get it from rubyforge: - http://rubyforge.org/projects/rake +This is a implementation of the JSON specification according to RFC 4627 +(http://www.ietf.org/rfc/rfc4627.txt). Starting from version 1.0.0 on there +will be two variants available: - or just type +* A pure ruby variant, that relies on the iconv and the stringscan + extensions, which are both part of the ruby standard library. +* The quite a bit faster C extension variant, which is in parts implemented + in C and comes with its own unicode conversion functions and a parser + generated by the ragel state machine compiler + (http://www.cs.queensu.ca/~thurston/ragel). - # gem install rake +Both variants of the JSON generator escape all non-ASCII and control characters +with \uXXXX escape sequences, and support UTF-16 surrogate pairs in order to be +able to generate the whole range of unicode code points. This means that +generated JSON document is encoded as UTF-8 (because ASCII is a subset of +UTF-8) and at the same time avoids decoding problems for receiving endpoints, +that don't expect UTF-8 encoded texts. On the negative side this may lead to a +bit longer strings than necessarry. - for the installation via rubygems. +All strings, that are to be encoded as JSON strings, should be UTF-8 byte +sequences on the Ruby side. To encode raw binary strings, that aren't UTF-8 +encoded, please use the to_json_raw_object method of String (which produces +an object, that contains a byte array) and decode the result on the receiving +endpoint. -- If you want to rebuild the parser.c file or draw nice graphviz images of the - state machines, you need ragel from: - http://www.cs.queensu.ca/~thurston/ragel +The JSON parsers can parse UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, and UTF-32LE +JSON documents under Ruby 1.8. Under Ruby 1.9 they take advantage of Ruby's +M17n features and can parse all documents which have the correct +String#encoding set. If a document string has ASCII-8BIT as an encoding the +parser attempts to figure out which of the UTF encodings from above it is and +trys to parse it. -Installation -============ +=== Installation -It's recommended to use the extension variant of JSON, because it's quite a bit -faster than the pure ruby variant. If you cannot build it on your system, you -can settle for the latter. +It's recommended to use the extension variant of JSON, because it's faster than +the pure ruby variant. If you cannot build it on your system, you can settle +for the latter. Just type into the command line as root: -# rake install + # rake install The above command will build the extensions and install them on your system. -# rake install_pure + # rake install_pure or -# ruby install.rb + # ruby install.rb will just install the pure ruby implementation of JSON. If you use Rubygems you can type -# gem install json + # gem install json instead, to install the newest JSON version. There is also a pure ruby json only variant of the gem, that can be installed with: -# gem install json_pure + # gem install json_pure + +=== Compiling the extensions yourself + +If you want to build the extensions yourself you need rake: + + You can get it from rubyforge: + http://rubyforge.org/projects/rake + + or just type + + # gem install rake + + for the installation via rubygems. + +If you want to create the parser.c file from its parser.rl file or draw nice +graphviz images of the state machines, you need ragel from: http://www.cs.queensu.ca/~thurston/ragel + + +=== Usage + +To use JSON you can + require 'json' +to load the installed variant (either the extension 'json' or the pure +variant 'json_pure'). If you have installed the extension variant, you can +pick either the extension variant or the pure variant by typing + require 'json/ext' +or + require 'json/pure' + +Now you can parse a JSON document into a ruby data structure by calling + + JSON.parse(document) + +If you want to generate a JSON document from a ruby data structure call + JSON.generate(data) + +You can also use the pretty_generate method (which formats the output more +verbosely and nicely) or fast_generate (which doesn't do any of the security +checks generate performs, e. g. nesting deepness checks). + +To create a valid JSON document you have to make sure, that the output is +embedded in either a JSON array [] or a JSON object {}. The easiest way to do +this, is by putting your values in a Ruby Array or Hash instance. + +There are also the JSON and JSON[] methods which use parse on a String or +generate a JSON document from an array or hash: + + document = JSON 'test' => 23 # => "{\"test\":23}" + document = JSON['test'] => 23 # => "{\"test\":23}" + +and + + data = JSON '{"test":23}' # => {"test"=>23} + data = JSON['{"test":23}'] # => {"test"=>23} + +You can choose to load a set of common additions to ruby core's objects if +you + require 'json/add/core' + +After requiring this you can, e. g., serialise/deserialise Ruby ranges: -Testing and Examples -==================== + JSON JSON(1..10) # => 1..10 -To run the tests type: +To find out how to add JSON support to other or your own classes, read the +section "More Examples" below. -$ rake test_ext +To get the best compatibility to rails' JSON implementation, you can + require 'json/add/rails' -This will build the extensions first and then test them. +Both of the additions attempt to require 'json' (like above) first, if it has +not been required yet. -$ rake test_pure +=== More Examples -This will test the pure ruby extensions. +To create a JSON document from a ruby data structure, you can call +JSON.generate like that: -There is also a small example in tools/server.rb if you want to see, how + json = JSON.generate [1, 2, {"a"=>3.141}, false, true, nil, 4..10] + # => "[1,2,{\"a\":3.141},false,true,null,\"4..10\"]" + +To get back a ruby data structure from a JSON document, you have to call +JSON.parse on it: + + JSON.parse json + # => [1, 2, {"a"=>3.141}, false, true, nil, "4..10"] + +Note, that the range from the original data structure is a simple +string now. The reason for this is, that JSON doesn't support ranges +or arbitrary classes. In this case the json library falls back to call +Object#to_json, which is the same as #to_s.to_json. + +It's possible to add JSON support serialization to arbitrary classes by +simply implementing a more specialized version of the #to_json method, that +should return a JSON object (a hash converted to JSON with #to_json) like +this (don't forget the *a for all the arguments): + + class Range + def to_json(*a) + { + 'json_class' => self.class.name, # = 'Range' + 'data' => [ first, last, exclude_end? ] + }.to_json(*a) + end + end + +The hash key 'json_class' is the class, that will be asked to deserialise the +JSON representation later. In this case it's 'Range', but any namespace of +the form 'A::B' or '::A::B' will do. All other keys are arbitrary and can be +used to store the necessary data to configure the object to be deserialised. + +If a the key 'json_class' is found in a JSON object, the JSON parser checks +if the given class responds to the json_create class method. If so, it is +called with the JSON object converted to a Ruby hash. So a range can +be deserialised by implementing Range.json_create like this: + + class Range + def self.json_create(o) + new(*o['data']) + end + end + +Now it possible to serialise/deserialise ranges as well: + + json = JSON.generate [1, 2, {"a"=>3.141}, false, true, nil, 4..10] + # => "[1,2,{\"a\":3.141},false,true,null,{\"json_class\":\"Range\",\"data\":[4,10,false]}]" + JSON.parse json + # => [1, 2, {"a"=>3.141}, false, true, nil, 4..10] + +JSON.generate always creates the shortest possible string representation of a +ruby data structure in one line. This is good for data storage or network +protocols, but not so good for humans to read. Fortunately there's also +JSON.pretty_generate (or JSON.pretty_generate) that creates a more readable +output: + + puts JSON.pretty_generate([1, 2, {"a"=>3.141}, false, true, nil, 4..10]) + [ + 1, + 2, + { + "a": 3.141 + }, + false, + true, + null, + { + "json_class": "Range", + "data": [ + 4, + 10, + false + ] + } + ] + +There are also the methods Kernel#j for generate, and Kernel#jj for +pretty_generate output to the console, that work analogous to Core Ruby's p and +the pp library's pp methods. + +The script tools/server.rb contains a small example if you want to test, how receiving a JSON object from a webrick server in your browser with the javasript prototype library (http://www.prototypejs.org) works. -Author -====== +=== Speed Comparisons + +I have created some benchmark results (see the benchmarks/data-p4-3Ghz +subdir of the package) for the JSON-parser to estimate the speed up in the C +extension: + + Comparing times (call_time_mean): + 1 ParserBenchmarkExt#parser 900 repeats: + 553.922304770 ( real) -> 21.500x + 0.001805307 + 2 ParserBenchmarkYAML#parser 1000 repeats: + 224.513358139 ( real) -> 8.714x + 0.004454078 + 3 ParserBenchmarkPure#parser 1000 repeats: + 26.755020642 ( real) -> 1.038x + 0.037376163 + 4 ParserBenchmarkRails#parser 1000 repeats: + 25.763381731 ( real) -> 1.000x + 0.038814780 + calls/sec ( time) -> speed covers + secs/call -Florian Frank +In the table above 1 is JSON::Ext::Parser, 2 is YAML.load with YAML +compatbile JSON document, 3 is is JSON::Pure::Parser, and 4 is +ActiveSupport::JSON.decode. The ActiveSupport JSON-decoder converts the +input first to YAML and then uses the YAML-parser, the conversion seems to +slow it down so much that it is only as fast as the JSON::Pure::Parser! -License -======= +If you look at the benchmark data you can see that this is mostly caused by +the frequent high outliers - the median of the Rails-parser runs is still +overall smaller than the median of the JSON::Pure::Parser runs: + + Comparing times (call_time_median): + 1 ParserBenchmarkExt#parser 900 repeats: + 800.592479481 ( real) -> 26.936x + 0.001249075 + 2 ParserBenchmarkYAML#parser 1000 repeats: + 271.002390644 ( real) -> 9.118x + 0.003690004 + 3 ParserBenchmarkRails#parser 1000 repeats: + 30.227910865 ( real) -> 1.017x + 0.033082008 + 4 ParserBenchmarkPure#parser 1000 repeats: + 29.722384421 ( real) -> 1.000x + 0.033644676 + calls/sec ( time) -> speed covers + secs/call + +I have benchmarked the JSON-Generator as well. This generated a few more +values, because there are different modes that also influence the achieved +speed: + + Comparing times (call_time_mean): + 1 GeneratorBenchmarkExt#generator_fast 1000 repeats: + 547.354332608 ( real) -> 15.090x + 0.001826970 + 2 GeneratorBenchmarkExt#generator_safe 1000 repeats: + 443.968212317 ( real) -> 12.240x + 0.002252414 + 3 GeneratorBenchmarkExt#generator_pretty 900 repeats: + 375.104545883 ( real) -> 10.341x + 0.002665923 + 4 GeneratorBenchmarkPure#generator_fast 1000 repeats: + 49.978706968 ( real) -> 1.378x + 0.020008521 + 5 GeneratorBenchmarkRails#generator 1000 repeats: + 38.531868759 ( real) -> 1.062x + 0.025952543 + 6 GeneratorBenchmarkPure#generator_safe 1000 repeats: + 36.927649925 ( real) -> 1.018x 7 (>=3859) + 0.027079979 + 7 GeneratorBenchmarkPure#generator_pretty 1000 repeats: + 36.272134441 ( real) -> 1.000x 6 (>=3859) + 0.027569373 + calls/sec ( time) -> speed covers + secs/call + +In the table above 1-3 are JSON::Ext::Generator methods. 4, 6, and 7 are +JSON::Pure::Generator methods and 5 is the Rails JSON generator. It is now a +bit faster than the generator_safe and generator_pretty methods of the pure +variant but slower than the others. + +To achieve the fastest JSON document output, you can use the fast_generate +method. Beware, that this will disable the checking for circular Ruby data +structures, which may cause JSON to go into an infinite loop. + +Here are the median comparisons for completeness' sake: + + Comparing times (call_time_median): + 1 GeneratorBenchmarkExt#generator_fast 1000 repeats: + 708.258020939 ( real) -> 16.547x + 0.001411915 + 2 GeneratorBenchmarkExt#generator_safe 1000 repeats: + 569.105020353 ( real) -> 13.296x + 0.001757145 + 3 GeneratorBenchmarkExt#generator_pretty 900 repeats: + 482.825371244 ( real) -> 11.280x + 0.002071142 + 4 GeneratorBenchmarkPure#generator_fast 1000 repeats: + 62.717626652 ( real) -> 1.465x + 0.015944481 + 5 GeneratorBenchmarkRails#generator 1000 repeats: + 43.965681162 ( real) -> 1.027x + 0.022745013 + 6 GeneratorBenchmarkPure#generator_safe 1000 repeats: + 43.929073409 ( real) -> 1.026x 7 (>=3859) + 0.022763968 + 7 GeneratorBenchmarkPure#generator_pretty 1000 repeats: + 42.802514491 ( real) -> 1.000x 6 (>=3859) + 0.023363113 + calls/sec ( time) -> speed covers + secs/call + +=== Author + +Florian Frank + +=== License Ruby License, see the COPYING file included in the source distribution. The Ruby License includes the GNU General Public License (GPL), Version 2, so see the file GPL as well. + +=== Download + +The latest version of this library can be downloaded at + +* http://rubyforge.org/frs?group_id=953 + +Online Documentation should be located at + +* http://json.rubyforge.org diff --git a/Rakefile b/Rakefile index f8849c1..156970f 100644 --- a/Rakefile +++ b/Rakefile @@ -28,9 +28,19 @@ CLEAN.include FileList['diagrams/*.*'], 'doc', 'coverage', 'tmp', FileList["ext/**/{Makefile,mkmf.log}"], FileList["{ext,lib}/**/*.{so,bundle,#{CONFIG['DLEXT']},o,obj,pdb,lib,manifest,exp,def}"] +def myruby(*args, &block) + @myruby ||= File.join(CONFIG['bindir'], CONFIG['ruby_install_name']) + options = (Hash === args.last) ? args.pop : {} + if args.length > 1 then + sh(*([@myruby] + args + [options]), &block) + else + sh("#{@myruby} #{args.first}", options, &block) + end +end + desc "Installing library (pure)" task :install_pure => :version do - ruby 'install.rb' + myruby 'install.rb' end task :install_ext_really do @@ -55,16 +65,16 @@ task :compile_ext => [ EXT_PARSER_DL, EXT_GENERATOR_DL ] file EXT_PARSER_DL => EXT_PARSER_SRC do cd EXT_PARSER_DIR do - ruby 'extconf.rb' - system MAKE + myruby 'extconf.rb' + sh MAKE end cp "#{EXT_PARSER_DIR}/parser.#{CONFIG['DLEXT']}", EXT_ROOT_DIR end file EXT_GENERATOR_DL => EXT_GENERATOR_SRC do cd EXT_GENERATOR_DIR do - ruby 'extconf.rb' - system MAKE + myruby 'extconf.rb' + sh MAKE end cp "#{EXT_GENERATOR_DIR}/generator.#{CONFIG['DLEXT']}", EXT_ROOT_DIR end @@ -79,9 +89,9 @@ end file EXT_PARSER_SRC => RAGEL_PATH do cd EXT_PARSER_DIR do if RAGEL_CODEGEN == 'ragel' - system "ragel parser.rl -G2 -o parser.c" + sh "ragel parser.rl -G2 -o parser.c" else - system "ragel -x parser.rl | #{RAGEL_CODEGEN} -G2" + sh "ragel -x parser.rl | #{RAGEL_CODEGEN} -G2" end end end @@ -93,9 +103,9 @@ task :ragel_dot_ps do File.new(RAGEL_PATH).grep(/^\s*machine\s*(\S+);\s*$/) { specs << $1 } for s in specs if RAGEL_DOTGEN == 'ragel' - system "ragel #{RAGEL_PATH} -S#{s} -p -V | dot -Tps -o#{root}/#{s}.ps" + sh "ragel #{RAGEL_PATH} -S#{s} -p -V | dot -Tps -o#{root}/#{s}.ps" else - system "ragel -x #{RAGEL_PATH} -S#{s} | #{RAGEL_DOTGEN} -p|dot -Tps -o#{root}/#{s}.ps" + sh "ragel -x #{RAGEL_PATH} -S#{s} | #{RAGEL_DOTGEN} -p|dot -Tps -o#{root}/#{s}.ps" end end end @@ -107,9 +117,9 @@ task :ragel_dot_png do File.new(RAGEL_PATH).grep(/^\s*machine\s*(\S+);\s*$/) { specs << $1 } for s in specs if RAGEL_DOTGEN == 'ragel' - system "ragel #{RAGEL_PATH} -S#{s} -p -V | dot -Tpng -o#{root}/#{s}.png" + sh "ragel #{RAGEL_PATH} -S#{s} -p -V | dot -Tpng -o#{root}/#{s}.png" else - system "ragel -x #{RAGEL_PATH} -S#{s} | #{RAGEL_DOTGEN} -p|dot -Tpng -o#{root}/#{s}.png" + sh "ragel -x #{RAGEL_PATH} -S#{s} | #{RAGEL_DOTGEN} -p|dot -Tpng -o#{root}/#{s}.png" end end end @@ -121,14 +131,14 @@ desc "Testing library (pure ruby)" task :test_pure => :clean do ENV['JSON'] = 'pure' ENV['RUBYOPT'] = "-Iext:lib #{ENV['RUBYOPT']}" - system "testrb #{Dir['tests/*.rb'] * ' '}" + myruby "-S testrb #{Dir['./tests/*.rb'] * ' '}" end desc "Testing library (extension)" task :test_ext => :compile_ext do ENV['JSON'] = 'ext' ENV['RUBYOPT'] = "-Iext:lib #{ENV['RUBYOPT']}" - system "testrb #{Dir['tests/*.rb'] * ' '}" + myruby "-S testrb #{Dir['./tests/*.rb'] * ' '}" end desc "Testing library (pure ruby and extension)" @@ -137,13 +147,13 @@ task :test => [ :test_pure, :test_ext ] desc "Benchmarking parser" task :benchmark_parser do ENV['RUBYOPT'] = "-Ilib:ext #{ENV['RUBYOPT']}" - ruby 'benchmarks/parser_benchmark.rb' + myruby 'benchmarks/parser_benchmark.rb' end desc "Benchmarking generator" task :benchmark_generator do ENV['RUBYOPT'] = "-Ilib:ext #{ENV['RUBYOPT']}" - ruby 'benchmarks/generator_benchmark.rb' + myruby 'benchmarks/generator_benchmark.rb' end desc "Benchmarking library" @@ -151,7 +161,7 @@ task :benchmark => [ :benchmark_parser, :benchmark_generator ] desc "Create RDOC documentation" task :doc => [ :version, EXT_PARSER_SRC ] do - system "rdoc -o doc -m doc-main.txt doc-main.txt lib/json.rb #{FileList['lib/json/**/*.rb']} #{EXT_PARSER_SRC} #{EXT_GENERATOR_SRC}" + sh "rdoc -o doc -m README README lib/json.rb #{FileList['lib/json/**/*.rb']} #{EXT_PARSER_SRC} #{EXT_GENERATOR_SRC}" end if defined?(Gem) and defined?(Rake::GemPackageTask) and defined?(Rake::ExtensionTask) @@ -170,9 +180,9 @@ if defined?(Gem) and defined?(Rake::GemPackageTask) and defined?(Rake::Extension s.default_executable = "edit_json.rb" s.has_rdoc = true - s.extra_rdoc_files << 'doc-main.txt' + s.extra_rdoc_files << 'README' s.rdoc_options << - '--title' << 'JSON -- A JSON implemention' << '--main' << 'doc-main.txt' + '--title' << 'JSON -- A JSON implemention' << '--main' << 'README' s.test_files.concat Dir['tests/*.rb'] s.author = "Florian Frank" @@ -205,9 +215,9 @@ if defined?(Gem) and defined?(Rake::GemPackageTask) and defined?(Rake::Extension s.default_executable = "edit_json.rb" s.has_rdoc = true - s.extra_rdoc_files << 'doc-main.txt' + s.extra_rdoc_files << 'README' s.rdoc_options << - '--title' << 'JSON -- A JSON implemention' << '--main' << 'doc-main.txt' + '--title' << 'JSON -- A JSON implemention' << '--main' << 'README' s.test_files.concat Dir['tests/*.rb'] s.author = "Florian Frank" @@ -259,8 +269,8 @@ end desc "Build all gems and archives for a new release." task :release => [ :clean, :version, :cross, :native, :gem ] do - system "#$0 clean native gem" - system "#$0 clean package" + sh "#$0 clean native gem" + sh "#$0 clean package" end desc "Compile in the the source directory" diff --git a/doc-main.txt b/doc-main.txt deleted file mode 100644 index 8a0bc55..0000000 --- a/doc-main.txt +++ /dev/null @@ -1,283 +0,0 @@ -== json - JSON Implementation for Ruby - -=== Description - -This is a implementation of the JSON specification according to RFC 4627 -(http://www.ietf.org/rfc/rfc4627.txt). Starting from version 1.0.0 on there -will be two variants available: - -* A pure ruby variant, that relies on the iconv and the stringscan - extensions, which are both part of the ruby standard library. -* The quite a bit faster C extension variant, which is in parts implemented - in C and comes with its own unicode conversion functions and a parser - generated by the ragel state machine compiler - (http://www.cs.queensu.ca/~thurston/ragel). - -Both variants of the JSON generator escape all non-ASCII an control -characters with \uXXXX escape sequences, and support UTF-16 surrogate pairs -in order to be able to generate the whole range of unicode code points. This -means that generated JSON text is encoded as UTF-8 (because ASCII is a subset -of UTF-8) and at the same time avoids decoding problems for receiving -endpoints, that don't expect UTF-8 encoded texts. On the negative side this -may lead to a bit longer strings than necessarry. - -All strings, that are to be encoded as JSON strings, should be UTF-8 byte -sequences on the Ruby side. To encode raw binary strings, that aren't UTF-8 -encoded, please use the to_json_raw_object method of String (which produces -an object, that contains a byte array) and decode the result on the receiving -endpoint. - -=== Author - -Florian Frank - -=== License - -This software is distributed under the same license as Ruby itself, see -http://www.ruby-lang.org/en/LICENSE.txt. - -=== Download - -The latest version of this library can be downloaded at - -* http://rubyforge.org/frs?group_id=953 - -Online Documentation should be located at - -* http://json.rubyforge.org - -=== Usage - -To use JSON you can - require 'json' -to load the installed variant (either the extension 'json' or the pure -variant 'json_pure'). If you have installed the extension variant, you can -pick either the extension variant or the pure variant by typing - require 'json/ext' -or - require 'json/pure' - -You can choose to load a set of common additions to ruby core's objects if -you - require 'json/add/core' - -After requiring this you can, e. g., serialise/deserialise Ruby ranges: - - JSON JSON(1..10) # => 1..10 - -To find out how to add JSON support to other or your own classes, read the -Examples section below. - -To get the best compatibility to rails' JSON implementation, you can - require 'json/add/rails' - -Both of the additions attempt to require 'json' (like above) first, if it has -not been required yet. - -=== Speed Comparisons - -I have created some benchmark results (see the benchmarks/data-p4-3Ghz -subdir of the package) for the JSON-parser to estimate the speed up in the C -extension: - - Comparing times (call_time_mean): - 1 ParserBenchmarkExt#parser 900 repeats: - 553.922304770 ( real) -> 21.500x - 0.001805307 - 2 ParserBenchmarkYAML#parser 1000 repeats: - 224.513358139 ( real) -> 8.714x - 0.004454078 - 3 ParserBenchmarkPure#parser 1000 repeats: - 26.755020642 ( real) -> 1.038x - 0.037376163 - 4 ParserBenchmarkRails#parser 1000 repeats: - 25.763381731 ( real) -> 1.000x - 0.038814780 - calls/sec ( time) -> speed covers - secs/call - -In the table above 1 is JSON::Ext::Parser, 2 is YAML.load with YAML -compatbile JSON document, 3 is is JSON::Pure::Parser, and 4 is -ActiveSupport::JSON.decode. The ActiveSupport JSON-decoder converts the -input first to YAML and then uses the YAML-parser, the conversion seems to -slow it down so much that it is only as fast as the JSON::Pure::Parser! - -If you look at the benchmark data you can see that this is mostly caused by -the frequent high outliers - the median of the Rails-parser runs is still -overall smaller than the median of the JSON::Pure::Parser runs: - - Comparing times (call_time_median): - 1 ParserBenchmarkExt#parser 900 repeats: - 800.592479481 ( real) -> 26.936x - 0.001249075 - 2 ParserBenchmarkYAML#parser 1000 repeats: - 271.002390644 ( real) -> 9.118x - 0.003690004 - 3 ParserBenchmarkRails#parser 1000 repeats: - 30.227910865 ( real) -> 1.017x - 0.033082008 - 4 ParserBenchmarkPure#parser 1000 repeats: - 29.722384421 ( real) -> 1.000x - 0.033644676 - calls/sec ( time) -> speed covers - secs/call - -I have benchmarked the JSON-Generator as well. This generated a few more -values, because there are different modes that also influence the achieved -speed: - - Comparing times (call_time_mean): - 1 GeneratorBenchmarkExt#generator_fast 1000 repeats: - 547.354332608 ( real) -> 15.090x - 0.001826970 - 2 GeneratorBenchmarkExt#generator_safe 1000 repeats: - 443.968212317 ( real) -> 12.240x - 0.002252414 - 3 GeneratorBenchmarkExt#generator_pretty 900 repeats: - 375.104545883 ( real) -> 10.341x - 0.002665923 - 4 GeneratorBenchmarkPure#generator_fast 1000 repeats: - 49.978706968 ( real) -> 1.378x - 0.020008521 - 5 GeneratorBenchmarkRails#generator 1000 repeats: - 38.531868759 ( real) -> 1.062x - 0.025952543 - 6 GeneratorBenchmarkPure#generator_safe 1000 repeats: - 36.927649925 ( real) -> 1.018x 7 (>=3859) - 0.027079979 - 7 GeneratorBenchmarkPure#generator_pretty 1000 repeats: - 36.272134441 ( real) -> 1.000x 6 (>=3859) - 0.027569373 - calls/sec ( time) -> speed covers - secs/call - -In the table above 1-3 are JSON::Ext::Generator methods. 4, 6, and 7 are -JSON::Pure::Generator methods and 5 is the Rails JSON generator. It is now a -bit faster than the generator_safe and generator_pretty methods of the pure -variant but slower than the others. - -To achieve the fastest JSON text output, you can use the fast_generate -method. Beware, that this will disable the checking for circular Ruby data -structures, which may cause JSON to go into an infinite loop. - -Here are the median comparisons for completeness' sake: - - Comparing times (call_time_median): - 1 GeneratorBenchmarkExt#generator_fast 1000 repeats: - 708.258020939 ( real) -> 16.547x - 0.001411915 - 2 GeneratorBenchmarkExt#generator_safe 1000 repeats: - 569.105020353 ( real) -> 13.296x - 0.001757145 - 3 GeneratorBenchmarkExt#generator_pretty 900 repeats: - 482.825371244 ( real) -> 11.280x - 0.002071142 - 4 GeneratorBenchmarkPure#generator_fast 1000 repeats: - 62.717626652 ( real) -> 1.465x - 0.015944481 - 5 GeneratorBenchmarkRails#generator 1000 repeats: - 43.965681162 ( real) -> 1.027x - 0.022745013 - 6 GeneratorBenchmarkPure#generator_safe 1000 repeats: - 43.929073409 ( real) -> 1.026x 7 (>=3859) - 0.022763968 - 7 GeneratorBenchmarkPure#generator_pretty 1000 repeats: - 42.802514491 ( real) -> 1.000x 6 (>=3859) - 0.023363113 - calls/sec ( time) -> speed covers - secs/call - -=== Examples - -To create a JSON text from a ruby data structure, you can call JSON.generate -like that: - - json = JSON.generate [1, 2, {"a"=>3.141}, false, true, nil, 4..10] - # => "[1,2,{\"a\":3.141},false,true,null,\"4..10\"]" - -To create a valid JSON text you have to make sure, that the output is -embedded in either a JSON array [] or a JSON object {}. The easiest way to do -this, is by putting your values in a Ruby Array or Hash instance. - -To get back a ruby data structure from a JSON text, you have to call -JSON.parse on it: - - JSON.parse json - # => [1, 2, {"a"=>3.141}, false, true, nil, "4..10"] - -Note, that the range from the original data structure is a simple -string now. The reason for this is, that JSON doesn't support ranges -or arbitrary classes. In this case the json library falls back to call -Object#to_json, which is the same as #to_s.to_json. - -It's possible to add JSON support serialization to arbitrary classes by -simply implementing a more specialized version of the #to_json method, that -should return a JSON object (a hash converted to JSON with #to_json) like -this (don't forget the *a for all the arguments): - - class Range - def to_json(*a) - { - 'json_class' => self.class.name, # = 'Range' - 'data' => [ first, last, exclude_end? ] - }.to_json(*a) - end - end - -The hash key 'json_class' is the class, that will be asked to deserialise the -JSON representation later. In this case it's 'Range', but any namespace of -the form 'A::B' or '::A::B' will do. All other keys are arbitrary and can be -used to store the necessary data to configure the object to be deserialised. - -If a the key 'json_class' is found in a JSON object, the JSON parser checks -if the given class responds to the json_create class method. If so, it is -called with the JSON object converted to a Ruby hash. So a range can -be deserialised by implementing Range.json_create like this: - - class Range - def self.json_create(o) - new(*o['data']) - end - end - -Now it possible to serialise/deserialise ranges as well: - - json = JSON.generate [1, 2, {"a"=>3.141}, false, true, nil, 4..10] - # => "[1,2,{\"a\":3.141},false,true,null,{\"json_class\":\"Range\",\"data\":[4,10,false]}]" - JSON.parse json - # => [1, 2, {"a"=>3.141}, false, true, nil, 4..10] - -JSON.generate always creates the shortest possible string representation of a -ruby data structure in one line. This good for data storage or network -protocols, but not so good for humans to read. Fortunately there's also -JSON.pretty_generate (or JSON.pretty_generate) that creates a more -readable output: - - puts JSON.pretty_generate([1, 2, {"a"=>3.141}, false, true, nil, 4..10]) - [ - 1, - 2, - { - "a": 3.141 - }, - false, - true, - null, - { - "json_class": "Range", - "data": [ - 4, - 10, - false - ] - } - ] - -There are also the methods Kernel#j for generate, and Kernel#jj for -pretty_generate output to the console, that work analogous to Core Ruby's p -and the pp library's pp methods. - -The script tools/server.rb contains a small example if you want to test, how -receiving a JSON object from a webrick server in your browser with the -javasript prototype library (http://www.prototypejs.org) works. - diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 6851e06..1781381 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -21,15 +21,19 @@ #ifdef HAVE_RUBY_ENCODING_H #include "ruby/encoding.h" #define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) +static VALUE mEncoding_ASCII_8BIT, mEncoding_UTF_8, mEncoding_UTF_16BE, + mEncoding_UTF_16LE, mEncoding_UTF_32BE, mEncoding_UTF_32LE; +static ID i_encoding, i_encode, i_encode_bang, i_force_encoding; #else #define FORCE_UTF8(obj) +static ID i_iconv; #endif static VALUE mJSON, mExt, cParser, eParserError, eNestingError; static VALUE CNaN, CInfinity, CMinusInfinity; static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_object_class, i_array_class; + i_chr, i_max_nesting, i_allow_nan, i_object_class, i_array_class; #define MinusInfinity "-Infinity" @@ -58,11 +62,11 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul Data_Get_Struct(self, JSON_Parser, json); -#line 84 "parser.rl" +#line 88 "parser.rl" -#line 66 "parser.c" +#line 70 "parser.c" static const int JSON_object_start = 1; static const int JSON_object_first_final = 27; static const int JSON_object_error = 0; @@ -70,7 +74,7 @@ static const int JSON_object_error = 0; static const int JSON_object_en_main = 1; -#line 117 "parser.rl" +#line 121 "parser.rl" static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -86,14 +90,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); -#line 90 "parser.c" +#line 94 "parser.c" { cs = JSON_object_start; } -#line 132 "parser.rl" +#line 136 "parser.rl" -#line 97 "parser.c" +#line 101 "parser.c" { if ( p == pe ) goto _test_eof; @@ -121,7 +125,7 @@ case 2: goto st2; goto st0; tr2: -#line 103 "parser.rl" +#line 107 "parser.rl" { char *np = JSON_parse_string(json, p, pe, &last_name); if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;} @@ -131,7 +135,7 @@ st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 135 "parser.c" +#line 139 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -198,7 +202,7 @@ case 8: goto st8; goto st0; tr11: -#line 92 "parser.rl" +#line 96 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v); @@ -214,7 +218,7 @@ st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 218 "parser.c" +#line 222 "parser.c" switch( (*p) ) { case 13: goto st9; case 32: goto st9; @@ -303,14 +307,14 @@ case 18: goto st9; goto st18; tr4: -#line 108 "parser.rl" +#line 112 "parser.rl" { p--; {p++; cs = 27; goto _out;} } goto st27; st27: if ( ++p == pe ) goto _test_eof27; case 27: -#line 314 "parser.c" +#line 318 "parser.c" goto st0; st19: if ( ++p == pe ) @@ -408,7 +412,7 @@ case 26: _out: {} } -#line 133 "parser.rl" +#line 137 "parser.rl" if (cs >= JSON_object_first_final) { if (RTEST(json->create_id)) { @@ -427,7 +431,7 @@ case 26: } -#line 431 "parser.c" +#line 435 "parser.c" static const int JSON_value_start = 1; static const int JSON_value_first_final = 21; static const int JSON_value_error = 0; @@ -435,7 +439,7 @@ static const int JSON_value_error = 0; static const int JSON_value_en_main = 1; -#line 231 "parser.rl" +#line 235 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -443,14 +447,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 447 "parser.c" +#line 451 "parser.c" { cs = JSON_value_start; } -#line 238 "parser.rl" +#line 242 "parser.rl" -#line 454 "parser.c" +#line 458 "parser.c" { if ( p == pe ) goto _test_eof; @@ -475,14 +479,14 @@ st0: cs = 0; goto _out; tr0: -#line 179 "parser.rl" +#line 183 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); if (np == NULL) { p--; {p++; cs = 21; goto _out;} } else {p = (( np))-1;} } goto st21; tr2: -#line 184 "parser.rl" +#line 188 "parser.rl" { char *np; if(pe > p + 9 && !strncmp(MinusInfinity, p, 9)) { @@ -502,7 +506,7 @@ tr2: } goto st21; tr5: -#line 202 "parser.rl" +#line 206 "parser.rl" { char *np; json->current_nesting++; @@ -512,7 +516,7 @@ tr5: } goto st21; tr9: -#line 210 "parser.rl" +#line 214 "parser.rl" { char *np; json->current_nesting++; @@ -522,7 +526,7 @@ tr9: } goto st21; tr16: -#line 172 "parser.rl" +#line 176 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -532,7 +536,7 @@ tr16: } goto st21; tr18: -#line 165 "parser.rl" +#line 169 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -542,19 +546,19 @@ tr18: } goto st21; tr22: -#line 159 "parser.rl" +#line 163 "parser.rl" { *result = Qfalse; } goto st21; tr25: -#line 156 "parser.rl" +#line 160 "parser.rl" { *result = Qnil; } goto st21; tr28: -#line 162 "parser.rl" +#line 166 "parser.rl" { *result = Qtrue; } @@ -563,9 +567,9 @@ st21: if ( ++p == pe ) goto _test_eof21; case 21: -#line 218 "parser.rl" +#line 222 "parser.rl" { p--; {p++; cs = 21; goto _out;} } -#line 569 "parser.c" +#line 573 "parser.c" goto st0; st2: if ( ++p == pe ) @@ -726,7 +730,7 @@ case 20: _out: {} } -#line 239 "parser.rl" +#line 243 "parser.rl" if (cs >= JSON_value_first_final) { return p; @@ -736,7 +740,7 @@ case 20: } -#line 740 "parser.c" +#line 744 "parser.c" static const int JSON_integer_start = 1; static const int JSON_integer_first_final = 5; static const int JSON_integer_error = 0; @@ -744,7 +748,7 @@ static const int JSON_integer_error = 0; static const int JSON_integer_en_main = 1; -#line 255 "parser.rl" +#line 259 "parser.rl" static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -752,15 +756,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res int cs = EVIL; -#line 756 "parser.c" +#line 760 "parser.c" { cs = JSON_integer_start; } -#line 262 "parser.rl" +#line 266 "parser.rl" json->memo = p; -#line 764 "parser.c" +#line 768 "parser.c" { if ( p == pe ) goto _test_eof; @@ -794,14 +798,14 @@ case 3: goto st0; goto tr4; tr4: -#line 252 "parser.rl" +#line 256 "parser.rl" { p--; {p++; cs = 5; goto _out;} } goto st5; st5: if ( ++p == pe ) goto _test_eof5; case 5: -#line 805 "parser.c" +#line 809 "parser.c" goto st0; st4: if ( ++p == pe ) @@ -820,7 +824,7 @@ case 4: _out: {} } -#line 264 "parser.rl" +#line 268 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; @@ -832,7 +836,7 @@ case 4: } -#line 836 "parser.c" +#line 840 "parser.c" static const int JSON_float_start = 1; static const int JSON_float_first_final = 10; static const int JSON_float_error = 0; @@ -840,7 +844,7 @@ static const int JSON_float_error = 0; static const int JSON_float_en_main = 1; -#line 286 "parser.rl" +#line 290 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -848,15 +852,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 852 "parser.c" +#line 856 "parser.c" { cs = JSON_float_start; } -#line 293 "parser.rl" +#line 297 "parser.rl" json->memo = p; -#line 860 "parser.c" +#line 864 "parser.c" { if ( p == pe ) goto _test_eof; @@ -914,14 +918,14 @@ case 5: goto st0; goto tr7; tr7: -#line 280 "parser.rl" +#line 284 "parser.rl" { p--; {p++; cs = 10; goto _out;} } goto st10; st10: if ( ++p == pe ) goto _test_eof10; case 10: -#line 925 "parser.c" +#line 929 "parser.c" goto st0; st6: if ( ++p == pe ) @@ -982,7 +986,7 @@ case 9: _out: {} } -#line 295 "parser.rl" +#line 299 "parser.rl" if (cs >= JSON_float_first_final) { long len = p - json->memo; @@ -995,7 +999,7 @@ case 9: -#line 999 "parser.c" +#line 1003 "parser.c" static const int JSON_array_start = 1; static const int JSON_array_first_final = 17; static const int JSON_array_error = 0; @@ -1003,7 +1007,7 @@ static const int JSON_array_error = 0; static const int JSON_array_en_main = 1; -#line 331 "parser.rl" +#line 335 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1017,14 +1021,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); -#line 1021 "parser.c" +#line 1025 "parser.c" { cs = JSON_array_start; } -#line 344 "parser.rl" +#line 348 "parser.rl" -#line 1028 "parser.c" +#line 1032 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1063,7 +1067,7 @@ case 2: goto st2; goto st0; tr2: -#line 312 "parser.rl" +#line 316 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v); @@ -1079,7 +1083,7 @@ st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1083 "parser.c" +#line 1087 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -1179,14 +1183,14 @@ case 12: goto st3; goto st12; tr4: -#line 323 "parser.rl" +#line 327 "parser.rl" { p--; {p++; cs = 17; goto _out;} } goto st17; st17: if ( ++p == pe ) goto _test_eof17; case 17: -#line 1190 "parser.c" +#line 1194 "parser.c" goto st0; st13: if ( ++p == pe ) @@ -1242,7 +1246,7 @@ case 16: _out: {} } -#line 345 "parser.rl" +#line 349 "parser.rl" if(cs >= JSON_array_first_final) { return p + 1; @@ -1308,7 +1312,7 @@ static VALUE json_string_unescape(char *p, char *pe) } -#line 1312 "parser.c" +#line 1316 "parser.c" static const int JSON_string_start = 1; static const int JSON_string_first_final = 8; static const int JSON_string_error = 0; @@ -1316,7 +1320,7 @@ static const int JSON_string_error = 0; static const int JSON_string_en_main = 1; -#line 429 "parser.rl" +#line 433 "parser.rl" static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1325,15 +1329,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu *result = rb_str_new("", 0); -#line 1329 "parser.c" +#line 1333 "parser.c" { cs = JSON_string_start; } -#line 437 "parser.rl" +#line 441 "parser.rl" json->memo = p; -#line 1337 "parser.c" +#line 1341 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1358,7 +1362,7 @@ case 2: goto st0; goto st2; tr2: -#line 415 "parser.rl" +#line 419 "parser.rl" { *result = json_string_unescape(json->memo + 1, p); if (NIL_P(*result)) { @@ -1369,14 +1373,14 @@ tr2: {p = (( p + 1))-1;} } } -#line 426 "parser.rl" +#line 430 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1380 "parser.c" +#line 1384 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1452,7 +1456,7 @@ case 7: _out: {} } -#line 439 "parser.rl" +#line 443 "parser.rl" if (cs >= JSON_string_first_final) { return p + 1; @@ -1463,7 +1467,7 @@ case 7: -#line 1467 "parser.c" +#line 1471 "parser.c" static const int JSON_start = 1; static const int JSON_first_final = 10; static const int JSON_error = 0; @@ -1471,7 +1475,7 @@ static const int JSON_error = 0; static const int JSON_en_main = 1; -#line 473 "parser.rl" +#line 477 "parser.rl" /* @@ -1486,6 +1490,54 @@ static const int JSON_en_main = 1; * */ +inline static VALUE convert_encoding(VALUE source) +{ + char *ptr = RSTRING_PTR(source); + long len = RSTRING_LEN(source); + if (len < 2) { + rb_raise(eParserError, "A JSON text must at least contain two octets!"); + } +#ifdef HAVE_RUBY_ENCODING_H + { + VALUE encoding = rb_funcall(source, i_encoding, 0); + if (encoding == mEncoding_ASCII_8BIT) { + if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_32BE); + source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8); + } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_16BE); + source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8); + } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_32LE); + source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8); + } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_16LE); + source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8); + } else { + source = rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_8); + } + } else { + source = rb_funcall(source, i_encode, 1, mEncoding_UTF_8); + } + } +#else + if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source); + } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source); + } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source); + } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source); + } +#endif + return source; +} + /* * call-seq: new(source, opts => {}) * @@ -1516,12 +1568,9 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) VALUE source, opts; GET_STRUCT; rb_scan_args(argc, argv, "11", &source, &opts); - source = StringValue(source); + source = convert_encoding(StringValue(source)); ptr = RSTRING_PTR(source); len = RSTRING_LEN(source); - if (len < 2) { - rb_raise(eParserError, "A JSON text must at least contain two octets!"); - } if (!NIL_P(opts)) { opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); if (NIL_P(opts)) { @@ -1578,18 +1627,6 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) json->array_class = Qnil; } json->current_nesting = 0; - /* - Convert these? - if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } - */ json->len = len; json->source = ptr; json->Vsource = source; @@ -1610,16 +1647,16 @@ static VALUE cParser_parse(VALUE self) GET_STRUCT; -#line 1614 "parser.c" +#line 1651 "parser.c" { cs = JSON_start; } -#line 611 "parser.rl" +#line 648 "parser.rl" p = json->source; pe = p + json->len; -#line 1623 "parser.c" +#line 1660 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1675,7 +1712,7 @@ case 5: goto st1; goto st5; tr3: -#line 462 "parser.rl" +#line 466 "parser.rl" { char *np; json->current_nesting = 1; @@ -1684,7 +1721,7 @@ tr3: } goto st10; tr4: -#line 455 "parser.rl" +#line 459 "parser.rl" { char *np; json->current_nesting = 1; @@ -1696,7 +1733,7 @@ st10: if ( ++p == pe ) goto _test_eof10; case 10: -#line 1700 "parser.c" +#line 1737 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -1753,7 +1790,7 @@ case 9: _out: {} } -#line 614 "parser.rl" +#line 651 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; @@ -1826,4 +1863,18 @@ void Init_parser() i_allow_nan = rb_intern("allow_nan"); i_object_class = rb_intern("object_class"); i_array_class = rb_intern("array_class"); +#ifdef HAVE_RUBY_ENCODING_H + mEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8")); + mEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be")); + mEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le")); + mEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be")); + mEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le")); + mEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit")); + i_encoding = rb_intern("encoding"); + i_encode = rb_intern("encode"); + i_encode_bang = rb_intern("encode!"); + i_force_encoding = rb_intern("force_encoding"); +#else + i_iconv = rb_intern("iconv"); +#endif } diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 384ab9f..7de7bb1 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -19,15 +19,19 @@ #ifdef HAVE_RUBY_ENCODING_H #include "ruby/encoding.h" #define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) +static VALUE mEncoding_ASCII_8BIT, mEncoding_UTF_8, mEncoding_UTF_16BE, + mEncoding_UTF_16LE, mEncoding_UTF_32BE, mEncoding_UTF_32LE; +static ID i_encoding, i_encode, i_encode_bang, i_force_encoding; #else #define FORCE_UTF8(obj) +static ID i_iconv; #endif static VALUE mJSON, mExt, cParser, eParserError, eNestingError; static VALUE CNaN, CInfinity, CMinusInfinity; static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_object_class, i_array_class; + i_chr, i_max_nesting, i_allow_nan, i_object_class, i_array_class; #define MinusInfinity "-Infinity" @@ -484,6 +488,54 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu * */ +inline static VALUE convert_encoding(VALUE source) +{ + char *ptr = RSTRING_PTR(source); + long len = RSTRING_LEN(source); + if (len < 2) { + rb_raise(eParserError, "A JSON text must at least contain two octets!"); + } +#ifdef HAVE_RUBY_ENCODING_H + { + VALUE encoding = rb_funcall(source, i_encoding, 0); + if (encoding == mEncoding_ASCII_8BIT) { + if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_32BE); + source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8); + } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_16BE); + source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8); + } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_32LE); + source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8); + } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { + source = rb_str_dup(source); + rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_16LE); + source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8); + } else { + source = rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_8); + } + } else { + source = rb_funcall(source, i_encode, 1, mEncoding_UTF_8); + } + } +#else + if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source); + } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source); + } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source); + } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { + source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source); + } +#endif + return source; +} + /* * call-seq: new(source, opts => {}) * @@ -514,12 +566,9 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) VALUE source, opts; GET_STRUCT; rb_scan_args(argc, argv, "11", &source, &opts); - source = StringValue(source); + source = convert_encoding(StringValue(source)); ptr = RSTRING_PTR(source); len = RSTRING_LEN(source); - if (len < 2) { - rb_raise(eParserError, "A JSON text must at least contain two octets!"); - } if (!NIL_P(opts)) { opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); if (NIL_P(opts)) { @@ -576,18 +625,6 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) json->array_class = Qnil; } json->current_nesting = 0; - /* - Convert these? - if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { - rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); - } - */ json->len = len; json->source = ptr; json->Vsource = source; @@ -683,4 +720,18 @@ void Init_parser() i_allow_nan = rb_intern("allow_nan"); i_object_class = rb_intern("object_class"); i_array_class = rb_intern("array_class"); +#ifdef HAVE_RUBY_ENCODING_H + mEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8")); + mEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be")); + mEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le")); + mEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be")); + mEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le")); + mEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit")); + i_encoding = rb_intern("encoding"); + i_encode = rb_intern("encode"); + i_encode_bang = rb_intern("encode!"); + i_force_encoding = rb_intern("force_encoding"); +#else + i_iconv = rb_intern("iconv"); +#endif } diff --git a/lib/json/common.rb b/lib/json/common.rb index c7808fb..39f6336 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -1,4 +1,5 @@ require 'json/version' +require 'iconv' module JSON class << self @@ -316,6 +317,11 @@ module JSON rescue JSON::NestingError raise ArgumentError, "exceed depth limit" end + + # Shortuct for iconv. + def self.iconv(to, from, string) + Iconv.iconv(to, from, string).first + end end module ::Kernel diff --git a/lib/json/pure/parser.rb b/lib/json/pure/parser.rb index 7e8fe08..7a09f2f 100644 --- a/lib/json/pure/parser.rb +++ b/lib/json/pure/parser.rb @@ -66,7 +66,41 @@ module JSON # * *object_class*: Defaults to Hash # * *array_class*: Defaults to Array def initialize(source, opts = {}) - super + if defined?(::Encoding) + if source.encoding == Encoding::ASCII_8BIT + b = source[0, 4].bytes.to_a + source = case + when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0 + source.dup.force_encoding(Encoding::UTF_32BE).encode!(Encoding::UTF_8) + when b.size >= 4 && b[0] == 0 && b[2] == 0 + source.dup.force_encoding(Encoding::UTF_16BE).encode!(Encoding::UTF_8) + when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0 + source.dup.force_encoding(Encoding::UTF_32LE).encode!(Encoding::UTF_8) + when b.size >= 4 && b[1] == 0 && b[3] == 0 + source.dup.force_encoding(Encoding::UTF_16LE).encode!(Encoding::UTF_8) + else + source.dup + end + else + source = source.encode(Encoding::UTF_8) + end + source.force_encoding(Encoding::ASCII_8BIT) + else + b = source + source = case + when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0 + JSON.iconv('utf-8', 'utf-32be', b) + when b.size >= 4 && b[0] == 0 && b[2] == 0 + JSON.iconv('utf-8', 'utf-16be', b) + when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0 + JSON.iconv('utf-8', 'utf-32le', b) + when b.size >= 4 && b[1] == 0 && b[3] == 0 + JSON.iconv('utf-8', 'utf-16le', b) + else + b + end + end + super source if !opts.key?(:max_nesting) # defaults to 19 @max_nesting = 19 elsif opts[:max_nesting] diff --git a/tests/test_json_encoding.rb b/tests/test_json_encoding.rb new file mode 100644 index 0000000..f46b476 --- /dev/null +++ b/tests/test_json_encoding.rb @@ -0,0 +1,57 @@ +#!/usr/bin/env ruby +# -*- coding: utf-8 -*- + +require 'test/unit' +case ENV['JSON'] +when 'pure' then require 'json/pure' +when 'ext' then require 'json/ext' +else require 'json' +end +require 'iconv' + +class TC_JSONEncoding < Test::Unit::TestCase + include JSON + + def setup + @utf_8 = '["© ≠ €!"]' + @decoded = [ "© ≠ €!" ] + if defined?(::Encoding) + @utf_8_ascii_8bit = @utf_8.dup.force_encoding(Encoding::ASCII_8BIT) + @utf_16be, = Iconv.iconv('utf-16be', 'utf-8', @utf_8) + @utf_16be_ascii_8bit = @utf_16be.dup.force_encoding(Encoding::ASCII_8BIT) + @utf_16le, = Iconv.iconv('utf-16le', 'utf-8', @utf_8) + @utf_16le_ascii_8bit = @utf_16le.dup.force_encoding(Encoding::ASCII_8BIT) + @utf_32be, = Iconv.iconv('utf-32be', 'utf-8', @utf_8) + @utf_32be_ascii_8bit = @utf_32be.dup.force_encoding(Encoding::ASCII_8BIT) + @utf_32le, = Iconv.iconv('utf-32le', 'utf-8', @utf_8) + @utf_32le_ascii_8bit = @utf_32le.dup.force_encoding(Encoding::ASCII_8BIT) + else + @utf_8_ascii_8bit = @utf_8.dup + @utf_16be, = Iconv.iconv('utf-16be', 'utf-8', @utf_8) + @utf_16be_ascii_8bit = @utf_16be.dup + @utf_16le, = Iconv.iconv('utf-16le', 'utf-8', @utf_8) + @utf_16le_ascii_8bit = @utf_16le.dup + @utf_32be, = Iconv.iconv('utf-32be', 'utf-8', @utf_8) + @utf_32be_ascii_8bit = @utf_32be.dup + @utf_32le, = Iconv.iconv('utf-32le', 'utf-8', @utf_8) + @utf_32le_ascii_8bit = @utf_32le.dup + end + end + + def test_decode + assert @decoded, JSON.parse(@utf_8) + assert @decoded, JSON.parse(@utf_16be) + assert @decoded, JSON.parse(@utf_16le) + assert @decoded, JSON.parse(@utf_32be) + assert @decoded, JSON.parse(@utf_32le) + end + + def test_decode_ascii_8bit + assert @decoded, JSON.parse(@utf_8_ascii_8bit) + assert @decoded, JSON.parse(@utf_16be_ascii_8bit) + assert @decoded, JSON.parse(@utf_16le_ascii_8bit) + assert @decoded, JSON.parse(@utf_32be_ascii_8bit) + assert @decoded, JSON.parse(@utf_32le_ascii_8bit) + end + +end -- cgit v1.2.1