From 7f6b0e3766a83a05aa7bf8351be5e3b478d5b7a6 Mon Sep 17 00:00:00 2001 From: Florian Frank Date: Fri, 3 Jun 2016 13:39:17 +0200 Subject: Force ASCII 8bit to be UTF-8 and hope for the best --- ext/json/ext/parser/parser.c | 162 ++++++++++++++++++++++-------------------- ext/json/ext/parser/parser.rl | 16 +++-- 2 files changed, 93 insertions(+), 85 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index c63a462..5b2e61c 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -68,9 +68,8 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) } #ifdef HAVE_RUBY_ENCODING_H -static VALUE CEncoding_UTF_8; - -static ID i_encode; +static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8; +static ID i_encoding, i_encode; #else static ID i_iconv; #endif @@ -84,11 +83,11 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, i_match_string, i_aset, i_aref, i_leftshift; -#line 110 "parser.rl" +#line 109 "parser.rl" -#line 92 "parser.c" +#line 91 "parser.c" static const int JSON_object_start = 1; static const int JSON_object_first_final = 27; static const int JSON_object_error = 0; @@ -96,7 +95,7 @@ static const int JSON_object_error = 0; static const int JSON_object_en_main = 1; -#line 151 "parser.rl" +#line 150 "parser.rl" static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -112,14 +111,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); -#line 116 "parser.c" +#line 115 "parser.c" { cs = JSON_object_start; } -#line 166 "parser.rl" +#line 165 "parser.rl" -#line 123 "parser.c" +#line 122 "parser.c" { if ( p == pe ) goto _test_eof; @@ -147,7 +146,7 @@ case 2: goto st2; goto st0; tr2: -#line 133 "parser.rl" +#line 132 "parser.rl" { char *np; json->parsing_name = 1; @@ -160,7 +159,7 @@ st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 164 "parser.c" +#line 163 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -227,7 +226,7 @@ case 8: goto st8; goto st0; tr11: -#line 118 "parser.rl" +#line 117 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v); @@ -247,7 +246,7 @@ st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 251 "parser.c" +#line 250 "parser.c" switch( (*p) ) { case 13: goto st9; case 32: goto st9; @@ -336,14 +335,14 @@ case 18: goto st9; goto st18; tr4: -#line 141 "parser.rl" +#line 140 "parser.rl" { p--; {p++; cs = 27; goto _out;} } goto st27; st27: if ( ++p == pe ) goto _test_eof27; case 27: -#line 347 "parser.c" +#line 346 "parser.c" goto st0; st19: if ( ++p == pe ) @@ -441,7 +440,7 @@ case 26: _out: {} } -#line 167 "parser.rl" +#line 166 "parser.rl" if (cs >= JSON_object_first_final) { if (json->create_additions) { @@ -466,7 +465,7 @@ case 26: -#line 470 "parser.c" +#line 469 "parser.c" static const int JSON_value_start = 1; static const int JSON_value_first_final = 29; static const int JSON_value_error = 0; @@ -474,7 +473,7 @@ static const int JSON_value_error = 0; static const int JSON_value_en_main = 1; -#line 271 "parser.rl" +#line 270 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -482,14 +481,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 486 "parser.c" +#line 485 "parser.c" { cs = JSON_value_start; } -#line 278 "parser.rl" +#line 277 "parser.rl" -#line 493 "parser.c" +#line 492 "parser.c" { if ( p == pe ) goto _test_eof; @@ -523,14 +522,14 @@ st0: cs = 0; goto _out; tr2: -#line 219 "parser.rl" +#line 218 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} } goto st29; tr3: -#line 224 "parser.rl" +#line 223 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -550,7 +549,7 @@ tr3: } goto st29; tr7: -#line 242 "parser.rl" +#line 241 "parser.rl" { char *np; json->current_nesting++; @@ -560,7 +559,7 @@ tr7: } goto st29; tr11: -#line 250 "parser.rl" +#line 249 "parser.rl" { char *np; json->current_nesting++; @@ -570,7 +569,7 @@ tr11: } goto st29; tr25: -#line 212 "parser.rl" +#line 211 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -580,7 +579,7 @@ tr25: } goto st29; tr27: -#line 205 "parser.rl" +#line 204 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -590,19 +589,19 @@ tr27: } goto st29; tr31: -#line 199 "parser.rl" +#line 198 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 196 "parser.rl" +#line 195 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 202 "parser.rl" +#line 201 "parser.rl" { *result = Qtrue; } @@ -611,9 +610,9 @@ st29: if ( ++p == pe ) goto _test_eof29; case 29: -#line 258 "parser.rl" +#line 257 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 617 "parser.c" +#line 616 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -854,7 +853,7 @@ case 28: _out: {} } -#line 279 "parser.rl" +#line 278 "parser.rl" if (cs >= JSON_value_first_final) { return p; @@ -864,7 +863,7 @@ case 28: } -#line 868 "parser.c" +#line 867 "parser.c" static const int JSON_integer_start = 1; static const int JSON_integer_first_final = 3; static const int JSON_integer_error = 0; @@ -872,7 +871,7 @@ static const int JSON_integer_error = 0; static const int JSON_integer_en_main = 1; -#line 295 "parser.rl" +#line 294 "parser.rl" static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -880,15 +879,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res int cs = EVIL; -#line 884 "parser.c" +#line 883 "parser.c" { cs = JSON_integer_start; } -#line 302 "parser.rl" +#line 301 "parser.rl" json->memo = p; -#line 892 "parser.c" +#line 891 "parser.c" { if ( p == pe ) goto _test_eof; @@ -922,14 +921,14 @@ case 3: goto st0; goto tr4; tr4: -#line 292 "parser.rl" +#line 291 "parser.rl" { p--; {p++; cs = 4; goto _out;} } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 933 "parser.c" +#line 932 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -948,7 +947,7 @@ case 5: _out: {} } -#line 304 "parser.rl" +#line 303 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; @@ -963,7 +962,7 @@ case 5: } -#line 967 "parser.c" +#line 966 "parser.c" static const int JSON_float_start = 1; static const int JSON_float_first_final = 8; static const int JSON_float_error = 0; @@ -971,7 +970,7 @@ static const int JSON_float_error = 0; static const int JSON_float_en_main = 1; -#line 329 "parser.rl" +#line 328 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -979,15 +978,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 983 "parser.c" +#line 982 "parser.c" { cs = JSON_float_start; } -#line 336 "parser.rl" +#line 335 "parser.rl" json->memo = p; -#line 991 "parser.c" +#line 990 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1045,14 +1044,14 @@ case 8: goto st0; goto tr9; tr9: -#line 323 "parser.rl" +#line 322 "parser.rl" { p--; {p++; cs = 9; goto _out;} } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 1056 "parser.c" +#line 1055 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1113,7 +1112,7 @@ case 7: _out: {} } -#line 338 "parser.rl" +#line 337 "parser.rl" if (cs >= JSON_float_first_final) { long len = p - json->memo; @@ -1129,7 +1128,7 @@ case 7: -#line 1133 "parser.c" +#line 1132 "parser.c" static const int JSON_array_start = 1; static const int JSON_array_first_final = 17; static const int JSON_array_error = 0; @@ -1137,7 +1136,7 @@ static const int JSON_array_error = 0; static const int JSON_array_en_main = 1; -#line 381 "parser.rl" +#line 380 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1151,14 +1150,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); -#line 1155 "parser.c" +#line 1154 "parser.c" { cs = JSON_array_start; } -#line 394 "parser.rl" +#line 393 "parser.rl" -#line 1162 "parser.c" +#line 1161 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1197,7 +1196,7 @@ case 2: goto st2; goto st0; tr2: -#line 358 "parser.rl" +#line 357 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v); @@ -1217,7 +1216,7 @@ st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1221 "parser.c" +#line 1220 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -1317,14 +1316,14 @@ case 12: goto st3; goto st12; tr4: -#line 373 "parser.rl" +#line 372 "parser.rl" { p--; {p++; cs = 17; goto _out;} } goto st17; st17: if ( ++p == pe ) goto _test_eof17; case 17: -#line 1328 "parser.c" +#line 1327 "parser.c" goto st0; st13: if ( ++p == pe ) @@ -1380,7 +1379,7 @@ case 16: _out: {} } -#line 395 "parser.rl" +#line 394 "parser.rl" if(cs >= JSON_array_first_final) { return p + 1; @@ -1461,7 +1460,7 @@ static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd) } -#line 1465 "parser.c" +#line 1464 "parser.c" static const int JSON_string_start = 1; static const int JSON_string_first_final = 8; static const int JSON_string_error = 0; @@ -1469,7 +1468,7 @@ static const int JSON_string_error = 0; static const int JSON_string_en_main = 1; -#line 494 "parser.rl" +#line 493 "parser.rl" static int @@ -1491,15 +1490,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu *result = rb_str_buf_new(0); -#line 1495 "parser.c" +#line 1494 "parser.c" { cs = JSON_string_start; } -#line 515 "parser.rl" +#line 514 "parser.rl" json->memo = p; -#line 1503 "parser.c" +#line 1502 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1524,7 +1523,7 @@ case 2: goto st0; goto st2; tr2: -#line 480 "parser.rl" +#line 479 "parser.rl" { *result = json_string_unescape(*result, json->memo + 1, p); if (NIL_P(*result)) { @@ -1535,14 +1534,14 @@ tr2: {p = (( p + 1))-1;} } } -#line 491 "parser.rl" +#line 490 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1546 "parser.c" +#line 1545 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1618,7 +1617,7 @@ case 7: _out: {} } -#line 517 "parser.rl" +#line 516 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -1656,9 +1655,12 @@ case 7: static VALUE convert_encoding(VALUE source) { #ifdef HAVE_RUBY_ENCODING_H - { - source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8); - } + VALUE encoding = rb_funcall(source, i_encoding, 0); + if (encoding == CEncoding_ASCII_8BIT) { + FORCE_UTF8(source); + } else { + source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8); + } #endif return source; } @@ -1784,7 +1786,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 1788 "parser.c" +#line 1790 "parser.c" static const int JSON_start = 1; static const int JSON_first_final = 10; static const int JSON_error = 0; @@ -1792,7 +1794,7 @@ static const int JSON_error = 0; static const int JSON_en_main = 1; -#line 696 "parser.rl" +#line 698 "parser.rl" /* @@ -1809,16 +1811,16 @@ static VALUE cParser_parse(VALUE self) GET_PARSER; -#line 1813 "parser.c" +#line 1815 "parser.c" { cs = JSON_start; } -#line 712 "parser.rl" +#line 714 "parser.rl" p = json->source; pe = p + json->len; -#line 1822 "parser.c" +#line 1824 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1852,7 +1854,7 @@ st0: cs = 0; goto _out; tr2: -#line 688 "parser.rl" +#line 690 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -1862,7 +1864,7 @@ st10: if ( ++p == pe ) goto _test_eof10; case 10: -#line 1866 "parser.c" +#line 1868 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -1951,7 +1953,7 @@ case 9: _out: {} } -#line 715 "parser.rl" +#line 717 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; @@ -2051,6 +2053,8 @@ void Init_parser(void) i_leftshift = rb_intern("<<"); #ifdef HAVE_RUBY_ENCODING_H CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8")); + CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit")); + i_encoding = rb_intern("encoding"); i_encode = rb_intern("encode"); #else i_iconv = rb_intern("iconv"); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 6f73307..f3933cb 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -66,9 +66,8 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) } #ifdef HAVE_RUBY_ENCODING_H -static VALUE CEncoding_UTF_8; - -static ID i_encode; +static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8; +static ID i_encoding, i_encode; #else static ID i_iconv; #endif @@ -551,9 +550,12 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu static VALUE convert_encoding(VALUE source) { #ifdef HAVE_RUBY_ENCODING_H - { - source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8); - } + VALUE encoding = rb_funcall(source, i_encoding, 0); + if (encoding == CEncoding_ASCII_8BIT) { + FORCE_UTF8(source); + } else { + source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8); + } #endif return source; } @@ -811,6 +813,8 @@ void Init_parser(void) i_leftshift = rb_intern("<<"); #ifdef HAVE_RUBY_ENCODING_H CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8")); + CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit")); + i_encoding = rb_intern("encoding"); i_encode = rb_intern("encode"); #else i_iconv = rb_intern("iconv"); -- cgit v1.2.1