diff options
author | Scott MacVicar <scottmac@php.net> | 2008-12-17 14:40:06 +0000 |
---|---|---|
committer | Scott MacVicar <scottmac@php.net> | 2008-12-17 14:40:06 +0000 |
commit | 8ef7fe1d996c702d37d21cbea0e2a28b1013ed4a (patch) | |
tree | 477a5a1126a1ebf08b25cdd11890f8cdec3700f1 /ext/json | |
parent | 77a63540a5f7f54b1741152f316fd59caa4d4989 (diff) | |
download | php-git-8ef7fe1d996c702d37d21cbea0e2a28b1013ed4a.tar.gz |
Update the JSON parser with that on json.org, biggest change here is code readability. Less magic numbers in the state table.
Add missing reflection information to json_encode()
Fixes bug #45791 with 0e0 not being supported as a value
Error values are stored when encountered during parsing
Diffstat (limited to 'ext/json')
-rw-r--r-- | ext/json/JSON_parser.c | 820 | ||||
-rw-r--r-- | ext/json/JSON_parser.h | 22 | ||||
-rw-r--r-- | ext/json/json.c | 41 |
3 files changed, 422 insertions, 461 deletions
diff --git a/ext/json/JSON_parser.c b/ext/json/JSON_parser.c index a90ade02de..5174fb756e 100644 --- a/ext/json/JSON_parser.c +++ b/ext/json/JSON_parser.c @@ -26,216 +26,191 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "JSON_parser.h" #include <stdio.h> +#include "JSON_parser.h" #define true 1 #define false 0 +#define __ -1 /* the universal error code */ /* - Characters are mapped into these 32 symbol classes. This allows for - significant reductions in the size of the state transition table. + Characters are mapped into these 31 character classes. This allows for + a significant reduction in the size of the state transition table. */ -/* error */ -#define S_ERR -1 - -/* space */ -#define S_SPA 0 - -/* other whitespace */ -#define S_WSP 1 - -/* { */ -#define S_LBE 2 - -/* } */ -#define S_RBE 3 - -/* [ */ -#define S_LBT 4 - -/* ] */ -#define S_RBT 5 - -/* : */ -#define S_COL 6 - -/* , */ -#define S_COM 7 - -/* " */ -#define S_QUO 8 - -/* \ */ -#define S_BAC 9 - -/* / */ -#define S_SLA 10 - -/* + */ -#define S_PLU 11 - -/* - */ -#define S_MIN 12 - -/* . */ -#define S_DOT 13 - -/* 0 */ -#define S_ZER 14 - -/* 123456789 */ -#define S_DIG 15 - -/* a */ -#define S__A_ 16 - -/* b */ -#define S__B_ 17 - -/* c */ -#define S__C_ 18 - -/* d */ -#define S__D_ 19 - -/* e */ -#define S__E_ 20 - -/* f */ -#define S__F_ 21 - -/* l */ -#define S__L_ 22 - -/* n */ -#define S__N_ 23 - -/* r */ -#define S__R_ 24 - -/* s */ -#define S__S_ 25 - -/* t */ -#define S__T_ 26 - -/* u */ -#define S__U_ 27 - -/* ABCDF */ -#define S_A_F 28 - -/* E */ -#define S_E 29 +enum classes { + C_SPACE, /* space */ + C_WHITE, /* other whitespace */ + C_LCURB, /* { */ + C_RCURB, /* } */ + C_LSQRB, /* [ */ + C_RSQRB, /* ] */ + C_COLON, /* : */ + C_COMMA, /* , */ + C_QUOTE, /* " */ + C_BACKS, /* \ */ + C_SLASH, /* / */ + C_PLUS, /* + */ + C_MINUS, /* - */ + C_POINT, /* . */ + C_ZERO , /* 0 */ + C_DIGIT, /* 123456789 */ + C_LOW_A, /* a */ + C_LOW_B, /* b */ + C_LOW_C, /* c */ + C_LOW_D, /* d */ + C_LOW_E, /* e */ + C_LOW_F, /* f */ + C_LOW_L, /* l */ + C_LOW_N, /* n */ + C_LOW_R, /* r */ + C_LOW_S, /* s */ + C_LOW_T, /* t */ + C_LOW_U, /* u */ + C_ABCDF, /* ABCDF */ + C_E, /* E */ + C_ETC, /* everything else */ + NR_CLASSES +}; -/* everything else */ -#define S_ETC 30 +static const int ascii_class[128] = { +/* + This array maps the 128 ASCII characters into character classes. + The remaining Unicode characters should be mapped to C_ETC. + Non-whitespace control characters are errors. +*/ + __, __, __, __, __, __, __, __, + __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __, + __, __, __, __, __, __, __, __, + __, __, __, __, __, __, __, __, + + C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH, + C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, + C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + + C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC, + + C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC, + C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC +}; /* - This table maps the 128 ASCII characters into the 32 character classes. - The remaining Unicode characters should be mapped to S_ETC. + The state codes. */ -static const int ascii_class[128] = { - S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, - S_ERR, S_WSP, S_WSP, S_ERR, S_ERR, S_WSP, S_ERR, S_ERR, - S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, - S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, - - S_SPA, S_ETC, S_QUO, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, - S_ETC, S_ETC, S_ETC, S_PLU, S_COM, S_MIN, S_DOT, S_SLA, - S_ZER, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, - S_DIG, S_DIG, S_COL, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, - - S_ETC, S_A_F, S_A_F, S_A_F, S_A_F, S_E , S_A_F, S_ETC, - S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, - S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, - S_ETC, S_ETC, S_ETC, S_LBT, S_BAC, S_RBT, S_ETC, S_ETC, - - S_ETC, S__A_, S__B_, S__C_, S__D_, S__E_, S__F_, S_ETC, - S_ETC, S_ETC, S_ETC, S_ETC, S__L_, S_ETC, S__N_, S_ETC, - S_ETC, S_ETC, S__R_, S__S_, S__T_, S__U_, S_ETC, S_ETC, - S_ETC, S_ETC, S_ETC, S_LBE, S_ETC, S_RBE, S_ETC, S_ETC +enum states { + GO, /* start */ + OK, /* ok */ + OB, /* object */ + KE, /* key */ + CO, /* colon */ + VA, /* value */ + AR, /* array */ + ST, /* string */ + ES, /* escape */ + U1, /* u1 */ + U2, /* u2 */ + U3, /* u3 */ + U4, /* u4 */ + MI, /* minus */ + ZE, /* zero */ + IN, /* integer */ + FR, /* fraction */ + E1, /* e */ + E2, /* ex */ + E3, /* exp */ + T1, /* tr */ + T2, /* tru */ + T3, /* true */ + F1, /* fa */ + F2, /* fal */ + F3, /* fals */ + F4, /* false */ + N1, /* nu */ + N2, /* nul */ + N3, /* null */ + NR_STATES }; +static const int state_transition_table[NR_STATES][NR_CLASSES] = { /* The state transition table takes the current state and the current symbol, - and returns either a new state or an action. A new state is a number between - 0 and 29. An action is a negative number between -1 and -9. A JSON text is - accepted if the end of the text is in state 9 and mode is MODE_DONE. -*/ -static const int state_transition_table[30][31] = { -/* 0*/ { 0, 0,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, -/* 1*/ { 1, 1,-1,-9,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, -/* 2*/ { 2, 2,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1}, -/* 3*/ { 3,-1, 3, 3, 3, 3, 3, 3,-4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}, -/* 4*/ {-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1, 3,-1, 3, 3,-1, 3, 5,-1,-1,-1}, -/* 5*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 6, 6, 6, 6, 6, 6, 6, 6,-1,-1,-1,-1,-1,-1, 6, 6,-1}, -/* 6*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 7, 7, 7, 7, 7, 7, 7, 7,-1,-1,-1,-1,-1,-1, 7, 7,-1}, -/* 7*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 8, 8, 8, 8, 8, 8, 8, 8,-1,-1,-1,-1,-1,-1, 8, 8,-1}, -/* 8*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3, 3, 3, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3, 3,-1}, -/* 9*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, -/*10*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,11,-1,-1,-1,-1,-1,-1}, -/*11*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,12,-1,-1,-1}, -/*12*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, -/*13*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,14,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, -/*14*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,15,-1,-1,-1,-1,-1,-1,-1,-1}, -/*15*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,16,-1,-1,-1,-1,-1}, -/*16*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, -/*17*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,18,-1,-1,-1}, -/*18*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,19,-1,-1,-1,-1,-1,-1,-1,-1}, -/*19*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1}, -/*20*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,21,22,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, -/*21*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, -/*22*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,22,22,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1}, -/*23*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,23,23,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1}, -/*24*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,25,25,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, -/*25*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, -/*26*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, -/*27*/ {27,27,-1,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, -/*28*/ {28,28,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1}, -/*29*/ {29,29,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1} + and returns either a new state or an action. An action is represented as a + negative number. A JSON text is accepted if at the end of the text the + state is OK and if the mode is MODE_DONE. + + white 1-9 ABCDF etc + space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E |*/ +/*start GO*/ {GO,GO,-6,__,-5,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*ok OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*object OB*/ {OB,OB,__,-9,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*key KE*/ {KE,KE,__,__,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*colon CO*/ {CO,CO,__,__,__,__,-2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*value VA*/ {VA,VA,-6,__,-5,__,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__}, +/*array AR*/ {AR,AR,-6,__,-5,-7,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__}, +/*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,ES,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST}, +/*escape ES*/ {__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__}, +/*u1 U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__}, +/*u2 U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__}, +/*u3 U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__}, +/*u4 U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ST,ST,ST,ST,ST,ST,ST,ST,__,__,__,__,__,__,ST,ST,__}, +/*minus MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IN,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*zero ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,__,__,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__}, +/*int IN*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,IN,IN,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__}, +/*frac FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__}, +/*e E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*ex E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*exp E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*tr T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__}, +/*tru T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__}, +/*true T3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__}, +/*fa F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, +/*fal F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__}, +/*fals F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__}, +/*false F4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__}, +/*nu N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__}, +/*nul N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__}, +/*null N3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__}, }; -#define JSON_PARSER_MAX_DEPTH 512 - /* - A stack maintains the states of nested structures. + These modes can be pushed on the stack. */ +enum modes { + MODE_ARRAY, + MODE_DONE, + MODE_KEY, + MODE_OBJECT, +}; -typedef struct json_parser -{ - int the_stack[JSON_PARSER_MAX_DEPTH]; - zval *the_zstack[JSON_PARSER_MAX_DEPTH]; - int the_top; -} json_parser; - +enum error_codes { + ERROR_DEPTH, + ERROR_MISMATCH, + ERROR_CTRL_CHAR, + ERROR_SYNTAX, +}; -/* - These modes can be pushed on the PDA stack. -*/ -#define MODE_DONE 1 -#define MODE_KEY 2 -#define MODE_OBJECT 3 -#define MODE_ARRAY 4 /* Push a mode onto the stack. Return false if there is overflow. */ static int -push(json_parser *json, zval *z, int mode) +push(JSON_parser jp, int mode) { - json->the_top += 1; - if (json->the_top >= JSON_PARSER_MAX_DEPTH) { + jp->top += 1; + if (jp->top >= jp->depth) { + jp->error = ERROR_DEPTH; return false; } - - json->the_stack[json->the_top] = mode; + jp->stack[jp->top] = mode; return true; } @@ -245,17 +220,50 @@ push(json_parser *json, zval *z, int mode) Return false if there is underflow or if the modes mismatch. */ static int -pop(json_parser *json, zval *z, int mode) +pop(JSON_parser jp, int mode) { - if (json->the_top < 0 || json->the_stack[json->the_top] != mode) { + if (jp->top < 0 || jp->stack[jp->top] != mode) { + jp->error = ERROR_MISMATCH; return false; } - json->the_stack[json->the_top] = 0; - json->the_top -= 1; - + jp->top -= 1; return true; } +/* + new_JSON_checker starts the checking process by constructing a JSON_checker + object. It takes a depth parameter that restricts the level of maximum + nesting. + + To continue the process, call JSON_checker_char for each character in the + JSON text, and then call JSON_checker_done to obtain the final result. + These functions are fully reentrant. + + The JSON_checker object will be deleted by JSON_checker_done. + JSON_checker_char will delete the JSON_checker object if it sees an error. +*/ +JSON_parser +new_JSON_parser(int depth) +{ + JSON_parser jp = (JSON_parser)emalloc(sizeof(struct JSON_parser_struct)); + jp->state = GO; + jp->depth = depth; + jp->top = -1; + jp->stack = (int*)ecalloc(depth, sizeof(int)); + push(jp, MODE_DONE); + return jp; +} + +/* + Delete the JSON_parser object. +*/ +int +free_JSON_parser(JSON_parser jp) +{ + efree((void*)jp->stack); + efree((void*)jp); + return false; +} static int dehexchar(char c) { @@ -284,12 +292,12 @@ static void json_create_zval(zval **z, smart_str *buf, int type) if (type == IS_LONG) { - double d = zend_strtod(buf->c, NULL); - if (d > LONG_MAX || d < LONG_MIN) { - ZVAL_DOUBLE(*z, d); - } else { - ZVAL_LONG(*z, (long)d); - } + double d = zend_strtod(buf->c, NULL); + if (d > LONG_MAX || d < LONG_MIN) { + ZVAL_DOUBLE(*z, d); + } else { + ZVAL_LONG(*z, (long)d); + } } else if (type == IS_DOUBLE) { @@ -348,11 +356,11 @@ static void utf16_to_utf8(smart_str *buf, unsigned short utf16) } } -static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int assoc TSRMLS_DC) +static void attach_zval(JSON_parser jp, int up, int cur, smart_str *key, int assoc TSRMLS_DC) { - zval *root = json->the_zstack[up]; - zval *child = json->the_zstack[cur]; - int up_mode = json->the_stack[up]; + zval *root = jp->the_zstack[up]; + zval *child = jp->the_zstack[cur]; + int up_mode = jp->stack[up]; if (up_mode == MODE_ARRAY) { @@ -363,9 +371,7 @@ static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int if (!assoc) { add_property_zval_ex(root, (key->len ? key->c : "_empty_"), (key->len ? (key->len + 1) : sizeof("_empty_")), child TSRMLS_CC); -#if PHP_MAJOR_VERSION >= 5 Z_DELREF_P(child); -#endif } else { @@ -376,7 +382,7 @@ static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int } -#define FREE_BUFFERS() do { smart_str_free(&buf); smart_str_free(&key); } while (0); +#define FREE_BUFFERS() smart_str_free(&buf); smart_str_free(&key); #define SWAP_BUFFERS(from, to) do { \ char *t1 = from.c; \ int t2 = from.a; \ @@ -387,9 +393,7 @@ static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int to.len = from.len; \ from.len = 0; \ } while(0); -#define JSON_RESET_TYPE() do { type = -1; } while(0); -#define JSON(x) the_json.x - +#define JSON_RESET_TYPE() type = -1; /* The JSON_parser takes a UTF-16 encoded string and determines if it is a @@ -399,213 +403,241 @@ static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int machine with a stack. */ int -JSON_parser(zval *z, unsigned short p[], int length, int assoc TSRMLS_DC) +parse_JSON(JSON_parser jp, zval *z, unsigned short utf16_json[], int length, int assoc TSRMLS_DC) { - int b; /* the next character */ - int c; /* the next character class */ - int s; /* the next state */ - json_parser the_json; /* the parser state */ - int the_state = 0; + int next_char; /* the next character */ + int next_class; /* the next character class */ + int next_state; /* the next state */ int the_index; smart_str buf = {0}; smart_str key = {0}; - int type = -1; unsigned short utf16 = 0; + int type; - JSON(the_top) = -1; - push(&the_json, z, MODE_DONE); + JSON_RESET_TYPE(); for (the_index = 0; the_index < length; the_index += 1) { - b = p[the_index]; - if ((b & 127) == b) { - c = ascii_class[b]; - if (c <= S_ERR) { - FREE_BUFFERS(); - return false; - } - } else { - c = S_ETC; - } + next_char = utf16_json[the_index]; + if (next_char >= 128) { + next_class = C_ETC; + } else { + next_class = ascii_class[next_char]; + if (next_class <= __) { + jp->error = ERROR_CTRL_CHAR; + FREE_BUFFERS(); + return false; + } + } /* Get the next state from the transition table. */ - s = state_transition_table[the_state][c]; - if (s < 0) { + next_state = state_transition_table[jp->state][next_class]; + if (next_state >= 0) { /* - Perform one of the predefined actions. + Change the state and iterate */ - switch (s) { + if (type == IS_STRING) { + if (next_state == ST && jp->state != U4) { + if (jp->state != ES) { + utf16_to_utf8(&buf, next_char); + } else { + switch (next_char) { + case 'b': + smart_str_appendc(&buf, '\b'); + break; + case 't': + smart_str_appendc(&buf, '\t'); + break; + case 'n': + smart_str_appendc(&buf, '\n'); + break; + case 'f': + smart_str_appendc(&buf, '\f'); + break; + case 'r': + smart_str_appendc(&buf, '\r'); + break; + default: + utf16_to_utf8(&buf, next_char); + break; + } + } + } else if (next_state == U2) { + utf16 = dehexchar(next_char) << 12; + } else if (next_state == U3) { + utf16 += dehexchar(next_char) << 8; + } else if (next_state == U4) { + utf16 += dehexchar(next_char) << 4; + } else if (next_state == ST && jp->state == U4) { + utf16 += dehexchar(next_char); + utf16_to_utf8(&buf, utf16); + } + } else if (type < IS_LONG && (next_class == C_DIGIT || next_class == C_ZERO)) { + type = IS_LONG; + smart_str_appendc(&buf, next_char); + } else if (type == IS_LONG && next_state == E1) { + type = IS_DOUBLE; + smart_str_appendc(&buf, next_char); + } else if (type < IS_DOUBLE && next_class == C_POINT) { + type = IS_DOUBLE; + smart_str_appendc(&buf, next_char); + } else if (type < IS_STRING && next_class == C_QUOTE) { + type = IS_STRING; + } else if (type < IS_BOOL && ((jp->state == T3 && next_state == OK) || (jp->state == F4 && next_state == OK))) { + type = IS_BOOL; + } else if (type < IS_NULL && jp->state == N3 && next_state == OK) { + type = IS_NULL; + } else if (type != IS_STRING && next_class > C_WHITE) { + utf16_to_utf8(&buf, next_char); + } + jp->state = next_state; + } else { /* - empty } + Perform one of the predefined actions. */ + switch (next_state) { +/* empty } */ case -9: - if (!pop(&the_json, z, MODE_KEY)) { + if (!pop(jp, MODE_KEY)) { FREE_BUFFERS(); return false; } - the_state = 9; + jp->state = OK; break; -/* - { -*/ +/* } */ case -8: - if (!push(&the_json, z, MODE_KEY)) { - FREE_BUFFERS(); - return false; - } - - the_state = 1; - if (JSON(the_top) > 0) + if (type != -1 && + (jp->stack[jp->top] == MODE_OBJECT || + jp->stack[jp->top] == MODE_ARRAY)) { - zval *obj; - - if (JSON(the_top) == 1) - { - obj = z; - } - else - { - ALLOC_INIT_ZVAL(obj); - } - - if (!assoc) - { - object_init(obj); - } - else - { - array_init(obj); - } + zval *mval; + smart_str_0(&buf); - JSON(the_zstack)[JSON(the_top)] = obj; + json_create_zval(&mval, &buf, type); - if (JSON(the_top) > 1) - { - attach_zval(&the_json, JSON(the_top-1), JSON(the_top), &key, assoc TSRMLS_CC); + if (!assoc) { + add_property_zval_ex(jp->the_zstack[jp->top], (key.len ? key.c : "_empty_"), (key.len ? (key.len + 1) : sizeof("_empty_")), mval TSRMLS_CC); + Z_DELREF_P(mval); + } else { + add_assoc_zval_ex(jp->the_zstack[jp->top], (key.len ? key.c : ""), (key.len ? (key.len + 1) : sizeof("")), mval); } - + key.len = 0; + buf.len = 0; JSON_RESET_TYPE(); } + + if (!pop(jp, MODE_OBJECT)) { + FREE_BUFFERS(); + return false; + } + jp->state = OK; break; -/* - } -*/ +/* ] */ case -7: + { if (type != -1 && - (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT || - JSON(the_stack)[JSON(the_top)] == MODE_ARRAY)) + (jp->stack[jp->top] == MODE_OBJECT || + jp->stack[jp->top] == MODE_ARRAY)) { zval *mval; smart_str_0(&buf); json_create_zval(&mval, &buf, type); - - if (!assoc) - { - add_property_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : "_empty_"), (key.len ? (key.len + 1) : sizeof("_empty_")), mval TSRMLS_CC); -#if PHP_MAJOR_VERSION >= 5 - Z_DELREF_P(mval); -#endif - } - else - { - add_assoc_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : ""), (key.len ? (key.len + 1) : sizeof("")), mval); - } - key.len = 0; + add_next_index_zval(jp->the_zstack[jp->top], mval); buf.len = 0; JSON_RESET_TYPE(); } - - if (!pop(&the_json, z, MODE_OBJECT)) { + if (!pop(jp, MODE_ARRAY)) { FREE_BUFFERS(); return false; } - the_state = 9; - break; -/* - [ -*/ + jp->state = OK; + } + break; +/* { */ case -6: - if (!push(&the_json, z, MODE_ARRAY)) { + if (!push(jp, MODE_KEY)) { FREE_BUFFERS(); return false; } - the_state = 2; - if (JSON(the_top) > 0) - { - zval *arr; + jp->state = OB; + if (jp->top > 0) { + zval *obj; - if (JSON(the_top) == 1) - { - arr = z; + if (jp->top == 1) { + obj = z; + } else { + ALLOC_INIT_ZVAL(obj); } - else - { - ALLOC_INIT_ZVAL(arr); + + if (!assoc) { + object_init(obj); + } else { + array_init(obj); } - array_init(arr); - JSON(the_zstack)[JSON(the_top)] = arr; + jp->the_zstack[jp->top] = obj; - if (JSON(the_top) > 1) - { - attach_zval(&the_json, JSON(the_top-1), JSON(the_top), &key, assoc TSRMLS_CC); + if (jp->top > 1) { + attach_zval(jp, jp->top - 1, jp->top, &key, assoc TSRMLS_CC); } JSON_RESET_TYPE(); } break; -/* - ] -*/ +/* [ */ case -5: - { - if (type != -1 && - (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT || - JSON(the_stack)[JSON(the_top)] == MODE_ARRAY)) - { - zval *mval; - smart_str_0(&buf); + if (!push(jp, MODE_ARRAY)) { + FREE_BUFFERS(); + return false; + } + jp->state = AR; + + if (jp->top > 0) { + zval *arr; + + if (jp->top == 1) { + arr = z; + } else { + ALLOC_INIT_ZVAL(arr); + } + + array_init(arr); + jp->the_zstack[jp->top] = arr; + + if (jp->top > 1) { + attach_zval(jp, jp->top - 1, jp->top, &key, assoc TSRMLS_CC); + } - json_create_zval(&mval, &buf, type); - add_next_index_zval(JSON(the_zstack)[JSON(the_top)], mval); - buf.len = 0; JSON_RESET_TYPE(); } - if (!pop(&the_json, z, MODE_ARRAY)) { - FREE_BUFFERS(); - return false; - } - the_state = 9; - } break; -/* - " -*/ + +/* " */ case -4: - switch (JSON(the_stack)[JSON(the_top)]) { + switch (jp->stack[jp->top]) { case MODE_KEY: - the_state = 27; + jp->state = CO; smart_str_0(&buf); SWAP_BUFFERS(buf, key); JSON_RESET_TYPE(); break; case MODE_ARRAY: case MODE_OBJECT: - the_state = 9; + jp->state = OK; break; case MODE_DONE: if (type == IS_STRING) { smart_str_0(&buf); ZVAL_STRINGL(z, buf.c, buf.len, 1); - the_state = 9; + jp->state = OK; break; } /* fall through if not IS_STRING */ @@ -614,48 +646,39 @@ JSON_parser(zval *z, unsigned short p[], int length, int assoc TSRMLS_DC) return false; } break; -/* - , -*/ +/* , */ case -3: { zval *mval; if (type != -1 && - (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT || - JSON(the_stack[JSON(the_top)]) == MODE_ARRAY)) + (jp->stack[jp->top] == MODE_OBJECT || + jp->stack[jp->top] == MODE_ARRAY)) { smart_str_0(&buf); json_create_zval(&mval, &buf, type); } - switch (JSON(the_stack)[JSON(the_top)]) { + switch (jp->stack[jp->top]) { case MODE_OBJECT: - if (pop(&the_json, z, MODE_OBJECT) && push(&the_json, z, MODE_KEY)) { - if (type != -1) - { - if (!assoc) - { - add_property_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : "_empty_"), (key.len ? (key.len + 1) : sizeof("_empty_")), mval TSRMLS_CC); -#if PHP_MAJOR_VERSION >= 5 + if (pop(jp, MODE_OBJECT) && push(jp, MODE_KEY)) { + if (type != -1) { + if (!assoc) { + add_property_zval_ex(jp->the_zstack[jp->top], (key.len ? key.c : "_empty_"), (key.len ? (key.len + 1) : sizeof("_empty_")), mval TSRMLS_CC); Z_DELREF_P(mval); -#endif - } - else - { - add_assoc_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : ""), (key.len ? (key.len + 1) : sizeof("")), mval); + } else { + add_assoc_zval_ex(jp->the_zstack[jp->top], (key.len ? key.c : ""), (key.len ? (key.len + 1) : sizeof("")), mval); } key.len = 0; } - the_state = 29; + jp->state = KE; } break; case MODE_ARRAY: - if (type != -1) - { - add_next_index_zval(JSON(the_zstack)[JSON(the_top)], mval); + if (type != -1) { + add_next_index_zval(jp->the_zstack[jp->top], mval); } - the_state = 28; + jp->state = VA; break; default: FREE_BUFFERS(); @@ -665,117 +688,28 @@ JSON_parser(zval *z, unsigned short p[], int length, int assoc TSRMLS_DC) JSON_RESET_TYPE(); } break; -/* - : -*/ +/* : */ case -2: - if (pop(&the_json, z, MODE_KEY) && push(&the_json, z, MODE_OBJECT)) { - the_state = 28; + if (pop(jp, MODE_KEY) && push(jp, MODE_OBJECT)) { + jp->state = VA; break; } /* syntax error */ - case -1: + default: { + jp->error = ERROR_SYNTAX; FREE_BUFFERS(); return false; } } - } else { -/* - Change the state and iterate. -*/ - if (type == IS_STRING) - { - if (s == 3 && the_state != 8) - { - if (the_state != 4) - { - utf16_to_utf8(&buf, b); - } - else - { - switch (b) - { - case 'b': - smart_str_appendc(&buf, '\b'); - break; - case 't': - smart_str_appendc(&buf, '\t'); - break; - case 'n': - smart_str_appendc(&buf, '\n'); - break; - case 'f': - smart_str_appendc(&buf, '\f'); - break; - case 'r': - smart_str_appendc(&buf, '\r'); - break; - default: - utf16_to_utf8(&buf, b); - break; - } - } - } - else if (s == 6) - { - utf16 = dehexchar(b) << 12; - } - else if (s == 7) - { - utf16 += dehexchar(b) << 8; - } - else if (s == 8) - { - utf16 += dehexchar(b) << 4; - } - else if (s == 3 && the_state == 8) - { - utf16 += dehexchar(b); - utf16_to_utf8(&buf, utf16); - } - } - else if (type < IS_LONG && (c == S_DIG || c == S_ZER)) - { - type = IS_LONG; - smart_str_appendc(&buf, b); - } - else if (type == IS_LONG && s == 24) - { - type = IS_DOUBLE; - smart_str_appendc(&buf, b); - } - else if (type < IS_DOUBLE && c == S_DOT) - { - type = IS_DOUBLE; - smart_str_appendc(&buf, b); - } - else if (type < IS_STRING && c == S_QUO) - { - type = IS_STRING; - } - else if (type < IS_BOOL && ((the_state == 12 && s == 9) || (the_state == 16 && s == 9))) - { - type = IS_BOOL; - } - else if (type < IS_NULL && the_state == 19 && s == 9) - { - type = IS_NULL; - } - else if (type != IS_STRING && c > S_WSP) - { - utf16_to_utf8(&buf, b); - } - - the_state = s; } } FREE_BUFFERS(); - return the_state == 9 && pop(&the_json, z, MODE_DONE); + return jp->state == OK && pop(jp, MODE_DONE); } diff --git a/ext/json/JSON_parser.h b/ext/json/JSON_parser.h index 4790f728e3..9aab62a912 100644 --- a/ext/json/JSON_parser.h +++ b/ext/json/JSON_parser.h @@ -1,6 +1,24 @@ -/* JSON_checker.h */ +/* JSON_parser.h */ + +#ifndef JSON_PARSER_H +#define JSON_PARSER_H #include "php.h" #include "ext/standard/php_smart_str.h" -extern int JSON_parser(zval *z, unsigned short p[], int length, int assoc TSRMLS_DC); +#define JSON_PARSER_MAX_DEPTH 512 + +typedef struct JSON_parser_struct { + int state; + int depth; + int top; + int error; + int* stack; + zval *the_zstack[JSON_PARSER_MAX_DEPTH]; + +} * JSON_parser; + +extern JSON_parser new_JSON_parser(int depth); +extern int parse_JSON(JSON_parser jp, zval *z, unsigned short utf16_json[], int length, int assoc TSRMLS_DC); +extern int free_JSON_parser(JSON_parser jp); +#endif diff --git a/ext/json/json.c b/ext/json/json.c index 24afcdf560..e09027576b 100644 --- a/ext/json/json.c +++ b/ext/json/json.c @@ -44,6 +44,7 @@ static const char digits[] = "0123456789abcdef"; /* {{{ arginfo */ ZEND_BEGIN_ARG_INFO_EX(arginfo_json_encode, 0, 0, 1) ZEND_ARG_INFO(0, value) + ZEND_ARG_INFO(0, options) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_json_decode, 0, 0, 1) @@ -443,7 +444,7 @@ static void json_encode_r(smart_str *buf, zval *val, int options TSRMLS_DC) /* { } /* }}} */ -/* {{{ proto string json_encode(mixed data) +/* {{{ proto string json_encode(mixed data [, int options]) Returns the JSON representation of a value */ static PHP_FUNCTION(json_encode) { @@ -463,17 +464,19 @@ static PHP_FUNCTION(json_encode) } /* }}} */ -/* {{{ proto mixed json_decode(string json [, bool assoc]) +/* {{{ proto mixed json_decode(string json [, bool assoc [, long depth]]) Decodes the JSON representation into a PHP value */ static PHP_FUNCTION(json_decode) { char *str; int str_len, utf16_len; zend_bool assoc = 0; /* return JS objects as PHP objects by default */ + long depth = JSON_PARSER_MAX_DEPTH; zval *z; unsigned short *utf16; + JSON_parser jp; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|b", &str, &str_len, &assoc) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|bl", &str, &str_len, &assoc, &depth) == FAILURE) { return; } @@ -491,11 +494,15 @@ static PHP_FUNCTION(json_decode) RETURN_NULL(); } + /* can be removed once we remove the max depth limit */ + if (depth <= 0 || depth > JSON_PARSER_MAX_DEPTH) { + depth = JSON_PARSER_MAX_DEPTH; + } + ALLOC_INIT_ZVAL(z); - if (JSON_parser(z, utf16, utf16_len, assoc TSRMLS_CC)) { + jp = new_JSON_parser(depth); + if (parse_JSON(jp, z, utf16, utf16_len, assoc TSRMLS_CC)) { *return_value = *z; - FREE_ZVAL(z); - efree(utf16); } else { @@ -503,28 +510,30 @@ static PHP_FUNCTION(json_decode) int type; long p; - zval_dtor(z); - FREE_ZVAL(z); - efree(utf16); - + RETVAL_NULL(); if (str_len == 4) { if (!strcasecmp(str, "null")) { - RETURN_NULL(); + RETVAL_NULL(); } else if (!strcasecmp(str, "true")) { - RETURN_BOOL(1); + RETVAL_BOOL(1); } } else if (str_len == 5 && !strcasecmp(str, "false")) { - RETURN_BOOL(0); + RETVAL_BOOL(0); } + if ((type = is_numeric_string(str, str_len, &p, &d, 0)) != 0) { if (type == IS_LONG) { - RETURN_LONG(p); + RETVAL_LONG(p); } else if (type == IS_DOUBLE) { - RETURN_DOUBLE(d); + RETVAL_DOUBLE(d); } } - RETURN_NULL(); + + zval_dtor(z); } + FREE_ZVAL(z); + efree(utf16); + free_JSON_parser(jp); } /* }}} */ |