From 91a24ecac3d722bc3e545fdd0657ed225e91c2df Mon Sep 17 00:00:00 2001 From: Watson Date: Sun, 4 Mar 2018 23:34:26 +0900 Subject: Does not check whether illegal utf-8 if string has ascii only. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Before ``` $ ruby bench_json_generate.rb Warming up -------------------------------------- json 25.000 i/100ms Calculating ------------------------------------- json 250.478 (± 4.8%) i/s - 1.250k in 5.002238s ``` ## After ``` $ ruby bench_json_generate.rb Warming up -------------------------------------- json 32.000 i/100ms Calculating ------------------------------------- json 360.652 (± 3.6%) i/s - 1.824k in 5.064511s ``` ## Test code ``` require 'json' require 'benchmark/ips' obj = [] 1000.times do |i| obj << { :string => "x" * 100, :utf8 => "あ" * 100 } end Benchmark.ips do |x| x.report "json" do |iter| count = 0 while count < iter JSON.generate(obj) count += 1 end end end ``` --- ext/json/ext/generator/generator.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index cebe7e9..596f33f 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -237,6 +237,7 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string) int escape_len; unsigned char c; char buf[6] = { '\\', 'u' }; + int ascii_only = rb_enc_str_asciionly_p(string); for (start = 0, end = 0; end < len;) { p = ptr + end; @@ -281,14 +282,17 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string) break; default: { - unsigned short clen = trailingBytesForUTF8[c] + 1; - if (end + clen > len) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "partial character in source, but hit end"); - } - if (!isLegalUTF8((UTF8 *) p, clen)) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf-8"); + unsigned short clen = 1; + if (!ascii_only) { + clen += trailingBytesForUTF8[c]; + if (end + clen > len) { + rb_raise(rb_path2class("JSON::GeneratorError"), + "partial character in source, but hit end"); + } + if (!isLegalUTF8((UTF8 *) p, clen)) { + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed utf-8"); + } } end += clen; } -- cgit v1.2.1