summaryrefslogtreecommitdiff
path: root/ext/json
diff options
context:
space:
mode:
authorScott MacVicar <scottmac@php.net>2008-12-17 14:40:06 +0000
committerScott MacVicar <scottmac@php.net>2008-12-17 14:40:06 +0000
commit8ef7fe1d996c702d37d21cbea0e2a28b1013ed4a (patch)
tree477a5a1126a1ebf08b25cdd11890f8cdec3700f1 /ext/json
parent77a63540a5f7f54b1741152f316fd59caa4d4989 (diff)
downloadphp-git-8ef7fe1d996c702d37d21cbea0e2a28b1013ed4a.tar.gz
Update the JSON parser with that on json.org, biggest change here is code readability. Less magic numbers in the state table.
Add missing reflection information to json_encode() Fixes bug #45791 with 0e0 not being supported as a value Error values are stored when encountered during parsing
Diffstat (limited to 'ext/json')
-rw-r--r--ext/json/JSON_parser.c820
-rw-r--r--ext/json/JSON_parser.h22
-rw-r--r--ext/json/json.c41
3 files changed, 422 insertions, 461 deletions
diff --git a/ext/json/JSON_parser.c b/ext/json/JSON_parser.c
index a90ade02de..5174fb756e 100644
--- a/ext/json/JSON_parser.c
+++ b/ext/json/JSON_parser.c
@@ -26,216 +26,191 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
-
-#include "JSON_parser.h"
#include <stdio.h>
+#include "JSON_parser.h"
#define true 1
#define false 0
+#define __ -1 /* the universal error code */
/*
- Characters are mapped into these 32 symbol classes. This allows for
- significant reductions in the size of the state transition table.
+ Characters are mapped into these 31 character classes. This allows for
+ a significant reduction in the size of the state transition table.
*/
-/* error */
-#define S_ERR -1
-
-/* space */
-#define S_SPA 0
-
-/* other whitespace */
-#define S_WSP 1
-
-/* { */
-#define S_LBE 2
-
-/* } */
-#define S_RBE 3
-
-/* [ */
-#define S_LBT 4
-
-/* ] */
-#define S_RBT 5
-
-/* : */
-#define S_COL 6
-
-/* , */
-#define S_COM 7
-
-/* " */
-#define S_QUO 8
-
-/* \ */
-#define S_BAC 9
-
-/* / */
-#define S_SLA 10
-
-/* + */
-#define S_PLU 11
-
-/* - */
-#define S_MIN 12
-
-/* . */
-#define S_DOT 13
-
-/* 0 */
-#define S_ZER 14
-
-/* 123456789 */
-#define S_DIG 15
-
-/* a */
-#define S__A_ 16
-
-/* b */
-#define S__B_ 17
-
-/* c */
-#define S__C_ 18
-
-/* d */
-#define S__D_ 19
-
-/* e */
-#define S__E_ 20
-
-/* f */
-#define S__F_ 21
-
-/* l */
-#define S__L_ 22
-
-/* n */
-#define S__N_ 23
-
-/* r */
-#define S__R_ 24
-
-/* s */
-#define S__S_ 25
-
-/* t */
-#define S__T_ 26
-
-/* u */
-#define S__U_ 27
-
-/* ABCDF */
-#define S_A_F 28
-
-/* E */
-#define S_E 29
+enum classes {
+ C_SPACE, /* space */
+ C_WHITE, /* other whitespace */
+ C_LCURB, /* { */
+ C_RCURB, /* } */
+ C_LSQRB, /* [ */
+ C_RSQRB, /* ] */
+ C_COLON, /* : */
+ C_COMMA, /* , */
+ C_QUOTE, /* " */
+ C_BACKS, /* \ */
+ C_SLASH, /* / */
+ C_PLUS, /* + */
+ C_MINUS, /* - */
+ C_POINT, /* . */
+ C_ZERO , /* 0 */
+ C_DIGIT, /* 123456789 */
+ C_LOW_A, /* a */
+ C_LOW_B, /* b */
+ C_LOW_C, /* c */
+ C_LOW_D, /* d */
+ C_LOW_E, /* e */
+ C_LOW_F, /* f */
+ C_LOW_L, /* l */
+ C_LOW_N, /* n */
+ C_LOW_R, /* r */
+ C_LOW_S, /* s */
+ C_LOW_T, /* t */
+ C_LOW_U, /* u */
+ C_ABCDF, /* ABCDF */
+ C_E, /* E */
+ C_ETC, /* everything else */
+ NR_CLASSES
+};
-/* everything else */
-#define S_ETC 30
+static const int ascii_class[128] = {
+/*
+ This array maps the 128 ASCII characters into character classes.
+ The remaining Unicode characters should be mapped to C_ETC.
+ Non-whitespace control characters are errors.
+*/
+ __, __, __, __, __, __, __, __,
+ __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __,
+ __, __, __, __, __, __, __, __,
+ __, __, __, __, __, __, __, __,
+
+ C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
+ C_ETC, C_ETC, C_ETC, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH,
+ C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
+ C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
+
+ C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC,
+ C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
+ C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
+ C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC,
+
+ C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
+ C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC,
+ C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC,
+ C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC
+};
/*
- This table maps the 128 ASCII characters into the 32 character classes.
- The remaining Unicode characters should be mapped to S_ETC.
+ The state codes.
*/
-static const int ascii_class[128] = {
- S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
- S_ERR, S_WSP, S_WSP, S_ERR, S_ERR, S_WSP, S_ERR, S_ERR,
- S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
- S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
-
- S_SPA, S_ETC, S_QUO, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
- S_ETC, S_ETC, S_ETC, S_PLU, S_COM, S_MIN, S_DOT, S_SLA,
- S_ZER, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG,
- S_DIG, S_DIG, S_COL, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
-
- S_ETC, S_A_F, S_A_F, S_A_F, S_A_F, S_E , S_A_F, S_ETC,
- S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
- S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
- S_ETC, S_ETC, S_ETC, S_LBT, S_BAC, S_RBT, S_ETC, S_ETC,
-
- S_ETC, S__A_, S__B_, S__C_, S__D_, S__E_, S__F_, S_ETC,
- S_ETC, S_ETC, S_ETC, S_ETC, S__L_, S_ETC, S__N_, S_ETC,
- S_ETC, S_ETC, S__R_, S__S_, S__T_, S__U_, S_ETC, S_ETC,
- S_ETC, S_ETC, S_ETC, S_LBE, S_ETC, S_RBE, S_ETC, S_ETC
+enum states {
+ GO, /* start */
+ OK, /* ok */
+ OB, /* object */
+ KE, /* key */
+ CO, /* colon */
+ VA, /* value */
+ AR, /* array */
+ ST, /* string */
+ ES, /* escape */
+ U1, /* u1 */
+ U2, /* u2 */
+ U3, /* u3 */
+ U4, /* u4 */
+ MI, /* minus */
+ ZE, /* zero */
+ IN, /* integer */
+ FR, /* fraction */
+ E1, /* e */
+ E2, /* ex */
+ E3, /* exp */
+ T1, /* tr */
+ T2, /* tru */
+ T3, /* true */
+ F1, /* fa */
+ F2, /* fal */
+ F3, /* fals */
+ F4, /* false */
+ N1, /* nu */
+ N2, /* nul */
+ N3, /* null */
+ NR_STATES
};
+static const int state_transition_table[NR_STATES][NR_CLASSES] = {
/*
The state transition table takes the current state and the current symbol,
- and returns either a new state or an action. A new state is a number between
- 0 and 29. An action is a negative number between -1 and -9. A JSON text is
- accepted if the end of the text is in state 9 and mode is MODE_DONE.
-*/
-static const int state_transition_table[30][31] = {
-/* 0*/ { 0, 0,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-/* 1*/ { 1, 1,-1,-9,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-/* 2*/ { 2, 2,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
-/* 3*/ { 3,-1, 3, 3, 3, 3, 3, 3,-4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
-/* 4*/ {-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1, 3,-1, 3, 3,-1, 3, 5,-1,-1,-1},
-/* 5*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 6, 6, 6, 6, 6, 6, 6, 6,-1,-1,-1,-1,-1,-1, 6, 6,-1},
-/* 6*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 7, 7, 7, 7, 7, 7, 7, 7,-1,-1,-1,-1,-1,-1, 7, 7,-1},
-/* 7*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 8, 8, 8, 8, 8, 8, 8, 8,-1,-1,-1,-1,-1,-1, 8, 8,-1},
-/* 8*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3, 3, 3, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3, 3,-1},
-/* 9*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-/*10*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,11,-1,-1,-1,-1,-1,-1},
-/*11*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,12,-1,-1,-1},
-/*12*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-/*13*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,14,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-/*14*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,15,-1,-1,-1,-1,-1,-1,-1,-1},
-/*15*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,16,-1,-1,-1,-1,-1},
-/*16*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-/*17*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,18,-1,-1,-1},
-/*18*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,19,-1,-1,-1,-1,-1,-1,-1,-1},
-/*19*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1},
-/*20*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,21,22,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-/*21*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-/*22*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,22,22,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
-/*23*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,23,23,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
-/*24*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,25,25,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-/*25*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-/*26*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-/*27*/ {27,27,-1,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-/*28*/ {28,28,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
-/*29*/ {29,29,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}
+ and returns either a new state or an action. An action is represented as a
+ negative number. A JSON text is accepted if at the end of the text the
+ state is OK and if the mode is MODE_DONE.
+
+ white 1-9 ABCDF etc
+ space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E |*/
+/*start GO*/ {GO,GO,-6,__,-5,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*ok OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*object OB*/ {OB,OB,__,-9,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*key KE*/ {KE,KE,__,__,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*colon CO*/ {CO,CO,__,__,__,__,-2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*value VA*/ {VA,VA,-6,__,-5,__,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__},
+/*array AR*/ {AR,AR,-6,__,-5,-7,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__},
+/*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,ES,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST},
+/*escape ES*/ {__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__},
+/*u1 U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__},
+/*u2 U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__},
+/*u3 U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__},
+/*u4 U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ST,ST,ST,ST,ST,ST,ST,ST,__,__,__,__,__,__,ST,ST,__},
+/*minus MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IN,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*zero ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,__,__,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__},
+/*int IN*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,IN,IN,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__},
+/*frac FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__},
+/*e E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*ex E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*exp E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*tr T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__},
+/*tru T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__},
+/*true T3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__},
+/*fa F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
+/*fal F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__},
+/*fals F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__},
+/*false F4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__},
+/*nu N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__},
+/*nul N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__},
+/*null N3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__},
};
-#define JSON_PARSER_MAX_DEPTH 512
-
/*
- A stack maintains the states of nested structures.
+ These modes can be pushed on the stack.
*/
+enum modes {
+ MODE_ARRAY,
+ MODE_DONE,
+ MODE_KEY,
+ MODE_OBJECT,
+};
-typedef struct json_parser
-{
- int the_stack[JSON_PARSER_MAX_DEPTH];
- zval *the_zstack[JSON_PARSER_MAX_DEPTH];
- int the_top;
-} json_parser;
-
+enum error_codes {
+ ERROR_DEPTH,
+ ERROR_MISMATCH,
+ ERROR_CTRL_CHAR,
+ ERROR_SYNTAX,
+};
-/*
- These modes can be pushed on the PDA stack.
-*/
-#define MODE_DONE 1
-#define MODE_KEY 2
-#define MODE_OBJECT 3
-#define MODE_ARRAY 4
/*
Push a mode onto the stack. Return false if there is overflow.
*/
static int
-push(json_parser *json, zval *z, int mode)
+push(JSON_parser jp, int mode)
{
- json->the_top += 1;
- if (json->the_top >= JSON_PARSER_MAX_DEPTH) {
+ jp->top += 1;
+ if (jp->top >= jp->depth) {
+ jp->error = ERROR_DEPTH;
return false;
}
-
- json->the_stack[json->the_top] = mode;
+ jp->stack[jp->top] = mode;
return true;
}
@@ -245,17 +220,50 @@ push(json_parser *json, zval *z, int mode)
Return false if there is underflow or if the modes mismatch.
*/
static int
-pop(json_parser *json, zval *z, int mode)
+pop(JSON_parser jp, int mode)
{
- if (json->the_top < 0 || json->the_stack[json->the_top] != mode) {
+ if (jp->top < 0 || jp->stack[jp->top] != mode) {
+ jp->error = ERROR_MISMATCH;
return false;
}
- json->the_stack[json->the_top] = 0;
- json->the_top -= 1;
-
+ jp->top -= 1;
return true;
}
+/*
+ new_JSON_checker starts the checking process by constructing a JSON_checker
+ object. It takes a depth parameter that restricts the level of maximum
+ nesting.
+
+ To continue the process, call JSON_checker_char for each character in the
+ JSON text, and then call JSON_checker_done to obtain the final result.
+ These functions are fully reentrant.
+
+ The JSON_checker object will be deleted by JSON_checker_done.
+ JSON_checker_char will delete the JSON_checker object if it sees an error.
+*/
+JSON_parser
+new_JSON_parser(int depth)
+{
+ JSON_parser jp = (JSON_parser)emalloc(sizeof(struct JSON_parser_struct));
+ jp->state = GO;
+ jp->depth = depth;
+ jp->top = -1;
+ jp->stack = (int*)ecalloc(depth, sizeof(int));
+ push(jp, MODE_DONE);
+ return jp;
+}
+
+/*
+ Delete the JSON_parser object.
+*/
+int
+free_JSON_parser(JSON_parser jp)
+{
+ efree((void*)jp->stack);
+ efree((void*)jp);
+ return false;
+}
static int dehexchar(char c)
{
@@ -284,12 +292,12 @@ static void json_create_zval(zval **z, smart_str *buf, int type)
if (type == IS_LONG)
{
- double d = zend_strtod(buf->c, NULL);
- if (d > LONG_MAX || d < LONG_MIN) {
- ZVAL_DOUBLE(*z, d);
- } else {
- ZVAL_LONG(*z, (long)d);
- }
+ double d = zend_strtod(buf->c, NULL);
+ if (d > LONG_MAX || d < LONG_MIN) {
+ ZVAL_DOUBLE(*z, d);
+ } else {
+ ZVAL_LONG(*z, (long)d);
+ }
}
else if (type == IS_DOUBLE)
{
@@ -348,11 +356,11 @@ static void utf16_to_utf8(smart_str *buf, unsigned short utf16)
}
}
-static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int assoc TSRMLS_DC)
+static void attach_zval(JSON_parser jp, int up, int cur, smart_str *key, int assoc TSRMLS_DC)
{
- zval *root = json->the_zstack[up];
- zval *child = json->the_zstack[cur];
- int up_mode = json->the_stack[up];
+ zval *root = jp->the_zstack[up];
+ zval *child = jp->the_zstack[cur];
+ int up_mode = jp->stack[up];
if (up_mode == MODE_ARRAY)
{
@@ -363,9 +371,7 @@ static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int
if (!assoc)
{
add_property_zval_ex(root, (key->len ? key->c : "_empty_"), (key->len ? (key->len + 1) : sizeof("_empty_")), child TSRMLS_CC);
-#if PHP_MAJOR_VERSION >= 5
Z_DELREF_P(child);
-#endif
}
else
{
@@ -376,7 +382,7 @@ static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int
}
-#define FREE_BUFFERS() do { smart_str_free(&buf); smart_str_free(&key); } while (0);
+#define FREE_BUFFERS() smart_str_free(&buf); smart_str_free(&key);
#define SWAP_BUFFERS(from, to) do { \
char *t1 = from.c; \
int t2 = from.a; \
@@ -387,9 +393,7 @@ static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int
to.len = from.len; \
from.len = 0; \
} while(0);
-#define JSON_RESET_TYPE() do { type = -1; } while(0);
-#define JSON(x) the_json.x
-
+#define JSON_RESET_TYPE() type = -1;
/*
The JSON_parser takes a UTF-16 encoded string and determines if it is a
@@ -399,213 +403,241 @@ static void attach_zval(json_parser *json, int up, int cur, smart_str *key, int
machine with a stack.
*/
int
-JSON_parser(zval *z, unsigned short p[], int length, int assoc TSRMLS_DC)
+parse_JSON(JSON_parser jp, zval *z, unsigned short utf16_json[], int length, int assoc TSRMLS_DC)
{
- int b; /* the next character */
- int c; /* the next character class */
- int s; /* the next state */
- json_parser the_json; /* the parser state */
- int the_state = 0;
+ int next_char; /* the next character */
+ int next_class; /* the next character class */
+ int next_state; /* the next state */
int the_index;
smart_str buf = {0};
smart_str key = {0};
- int type = -1;
unsigned short utf16 = 0;
+ int type;
- JSON(the_top) = -1;
- push(&the_json, z, MODE_DONE);
+ JSON_RESET_TYPE();
for (the_index = 0; the_index < length; the_index += 1) {
- b = p[the_index];
- if ((b & 127) == b) {
- c = ascii_class[b];
- if (c <= S_ERR) {
- FREE_BUFFERS();
- return false;
- }
- } else {
- c = S_ETC;
- }
+ next_char = utf16_json[the_index];
+ if (next_char >= 128) {
+ next_class = C_ETC;
+ } else {
+ next_class = ascii_class[next_char];
+ if (next_class <= __) {
+ jp->error = ERROR_CTRL_CHAR;
+ FREE_BUFFERS();
+ return false;
+ }
+ }
/*
Get the next state from the transition table.
*/
- s = state_transition_table[the_state][c];
- if (s < 0) {
+ next_state = state_transition_table[jp->state][next_class];
+ if (next_state >= 0) {
/*
- Perform one of the predefined actions.
+ Change the state and iterate
*/
- switch (s) {
+ if (type == IS_STRING) {
+ if (next_state == ST && jp->state != U4) {
+ if (jp->state != ES) {
+ utf16_to_utf8(&buf, next_char);
+ } else {
+ switch (next_char) {
+ case 'b':
+ smart_str_appendc(&buf, '\b');
+ break;
+ case 't':
+ smart_str_appendc(&buf, '\t');
+ break;
+ case 'n':
+ smart_str_appendc(&buf, '\n');
+ break;
+ case 'f':
+ smart_str_appendc(&buf, '\f');
+ break;
+ case 'r':
+ smart_str_appendc(&buf, '\r');
+ break;
+ default:
+ utf16_to_utf8(&buf, next_char);
+ break;
+ }
+ }
+ } else if (next_state == U2) {
+ utf16 = dehexchar(next_char) << 12;
+ } else if (next_state == U3) {
+ utf16 += dehexchar(next_char) << 8;
+ } else if (next_state == U4) {
+ utf16 += dehexchar(next_char) << 4;
+ } else if (next_state == ST && jp->state == U4) {
+ utf16 += dehexchar(next_char);
+ utf16_to_utf8(&buf, utf16);
+ }
+ } else if (type < IS_LONG && (next_class == C_DIGIT || next_class == C_ZERO)) {
+ type = IS_LONG;
+ smart_str_appendc(&buf, next_char);
+ } else if (type == IS_LONG && next_state == E1) {
+ type = IS_DOUBLE;
+ smart_str_appendc(&buf, next_char);
+ } else if (type < IS_DOUBLE && next_class == C_POINT) {
+ type = IS_DOUBLE;
+ smart_str_appendc(&buf, next_char);
+ } else if (type < IS_STRING && next_class == C_QUOTE) {
+ type = IS_STRING;
+ } else if (type < IS_BOOL && ((jp->state == T3 && next_state == OK) || (jp->state == F4 && next_state == OK))) {
+ type = IS_BOOL;
+ } else if (type < IS_NULL && jp->state == N3 && next_state == OK) {
+ type = IS_NULL;
+ } else if (type != IS_STRING && next_class > C_WHITE) {
+ utf16_to_utf8(&buf, next_char);
+ }
+ jp->state = next_state;
+ } else {
/*
- empty }
+ Perform one of the predefined actions.
*/
+ switch (next_state) {
+/* empty } */
case -9:
- if (!pop(&the_json, z, MODE_KEY)) {
+ if (!pop(jp, MODE_KEY)) {
FREE_BUFFERS();
return false;
}
- the_state = 9;
+ jp->state = OK;
break;
-/*
- {
-*/
+/* } */
case -8:
- if (!push(&the_json, z, MODE_KEY)) {
- FREE_BUFFERS();
- return false;
- }
-
- the_state = 1;
- if (JSON(the_top) > 0)
+ if (type != -1 &&
+ (jp->stack[jp->top] == MODE_OBJECT ||
+ jp->stack[jp->top] == MODE_ARRAY))
{
- zval *obj;
-
- if (JSON(the_top) == 1)
- {
- obj = z;
- }
- else
- {
- ALLOC_INIT_ZVAL(obj);
- }
-
- if (!assoc)
- {
- object_init(obj);
- }
- else
- {
- array_init(obj);
- }
+ zval *mval;
+ smart_str_0(&buf);
- JSON(the_zstack)[JSON(the_top)] = obj;
+ json_create_zval(&mval, &buf, type);
- if (JSON(the_top) > 1)
- {
- attach_zval(&the_json, JSON(the_top-1), JSON(the_top), &key, assoc TSRMLS_CC);
+ if (!assoc) {
+ add_property_zval_ex(jp->the_zstack[jp->top], (key.len ? key.c : "_empty_"), (key.len ? (key.len + 1) : sizeof("_empty_")), mval TSRMLS_CC);
+ Z_DELREF_P(mval);
+ } else {
+ add_assoc_zval_ex(jp->the_zstack[jp->top], (key.len ? key.c : ""), (key.len ? (key.len + 1) : sizeof("")), mval);
}
-
+ key.len = 0;
+ buf.len = 0;
JSON_RESET_TYPE();
}
+
+ if (!pop(jp, MODE_OBJECT)) {
+ FREE_BUFFERS();
+ return false;
+ }
+ jp->state = OK;
break;
-/*
- }
-*/
+/* ] */
case -7:
+ {
if (type != -1 &&
- (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT ||
- JSON(the_stack)[JSON(the_top)] == MODE_ARRAY))
+ (jp->stack[jp->top] == MODE_OBJECT ||
+ jp->stack[jp->top] == MODE_ARRAY))
{
zval *mval;
smart_str_0(&buf);
json_create_zval(&mval, &buf, type);
-
- if (!assoc)
- {
- add_property_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : "_empty_"), (key.len ? (key.len + 1) : sizeof("_empty_")), mval TSRMLS_CC);
-#if PHP_MAJOR_VERSION >= 5
- Z_DELREF_P(mval);
-#endif
- }
- else
- {
- add_assoc_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : ""), (key.len ? (key.len + 1) : sizeof("")), mval);
- }
- key.len = 0;
+ add_next_index_zval(jp->the_zstack[jp->top], mval);
buf.len = 0;
JSON_RESET_TYPE();
}
-
- if (!pop(&the_json, z, MODE_OBJECT)) {
+ if (!pop(jp, MODE_ARRAY)) {
FREE_BUFFERS();
return false;
}
- the_state = 9;
- break;
-/*
- [
-*/
+ jp->state = OK;
+ }
+ break;
+/* { */
case -6:
- if (!push(&the_json, z, MODE_ARRAY)) {
+ if (!push(jp, MODE_KEY)) {
FREE_BUFFERS();
return false;
}
- the_state = 2;
- if (JSON(the_top) > 0)
- {
- zval *arr;
+ jp->state = OB;
+ if (jp->top > 0) {
+ zval *obj;
- if (JSON(the_top) == 1)
- {
- arr = z;
+ if (jp->top == 1) {
+ obj = z;
+ } else {
+ ALLOC_INIT_ZVAL(obj);
}
- else
- {
- ALLOC_INIT_ZVAL(arr);
+
+ if (!assoc) {
+ object_init(obj);
+ } else {
+ array_init(obj);
}
- array_init(arr);
- JSON(the_zstack)[JSON(the_top)] = arr;
+ jp->the_zstack[jp->top] = obj;
- if (JSON(the_top) > 1)
- {
- attach_zval(&the_json, JSON(the_top-1), JSON(the_top), &key, assoc TSRMLS_CC);
+ if (jp->top > 1) {
+ attach_zval(jp, jp->top - 1, jp->top, &key, assoc TSRMLS_CC);
}
JSON_RESET_TYPE();
}
break;
-/*
- ]
-*/
+/* [ */
case -5:
- {
- if (type != -1 &&
- (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT ||
- JSON(the_stack)[JSON(the_top)] == MODE_ARRAY))
- {
- zval *mval;
- smart_str_0(&buf);
+ if (!push(jp, MODE_ARRAY)) {
+ FREE_BUFFERS();
+ return false;
+ }
+ jp->state = AR;
+
+ if (jp->top > 0) {
+ zval *arr;
+
+ if (jp->top == 1) {
+ arr = z;
+ } else {
+ ALLOC_INIT_ZVAL(arr);
+ }
+
+ array_init(arr);
+ jp->the_zstack[jp->top] = arr;
+
+ if (jp->top > 1) {
+ attach_zval(jp, jp->top - 1, jp->top, &key, assoc TSRMLS_CC);
+ }
- json_create_zval(&mval, &buf, type);
- add_next_index_zval(JSON(the_zstack)[JSON(the_top)], mval);
- buf.len = 0;
JSON_RESET_TYPE();
}
- if (!pop(&the_json, z, MODE_ARRAY)) {
- FREE_BUFFERS();
- return false;
- }
- the_state = 9;
- }
break;
-/*
- "
-*/
+
+/* " */
case -4:
- switch (JSON(the_stack)[JSON(the_top)]) {
+ switch (jp->stack[jp->top]) {
case MODE_KEY:
- the_state = 27;
+ jp->state = CO;
smart_str_0(&buf);
SWAP_BUFFERS(buf, key);
JSON_RESET_TYPE();
break;
case MODE_ARRAY:
case MODE_OBJECT:
- the_state = 9;
+ jp->state = OK;
break;
case MODE_DONE:
if (type == IS_STRING) {
smart_str_0(&buf);
ZVAL_STRINGL(z, buf.c, buf.len, 1);
- the_state = 9;
+ jp->state = OK;
break;
}
/* fall through if not IS_STRING */
@@ -614,48 +646,39 @@ JSON_parser(zval *z, unsigned short p[], int length, int assoc TSRMLS_DC)
return false;
}
break;
-/*
- ,
-*/
+/* , */
case -3:
{
zval *mval;
if (type != -1 &&
- (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT ||
- JSON(the_stack[JSON(the_top)]) == MODE_ARRAY))
+ (jp->stack[jp->top] == MODE_OBJECT ||
+ jp->stack[jp->top] == MODE_ARRAY))
{
smart_str_0(&buf);
json_create_zval(&mval, &buf, type);
}
- switch (JSON(the_stack)[JSON(the_top)]) {
+ switch (jp->stack[jp->top]) {
case MODE_OBJECT:
- if (pop(&the_json, z, MODE_OBJECT) && push(&the_json, z, MODE_KEY)) {
- if (type != -1)
- {
- if (!assoc)
- {
- add_property_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : "_empty_"), (key.len ? (key.len + 1) : sizeof("_empty_")), mval TSRMLS_CC);
-#if PHP_MAJOR_VERSION >= 5
+ if (pop(jp, MODE_OBJECT) && push(jp, MODE_KEY)) {
+ if (type != -1) {
+ if (!assoc) {
+ add_property_zval_ex(jp->the_zstack[jp->top], (key.len ? key.c : "_empty_"), (key.len ? (key.len + 1) : sizeof("_empty_")), mval TSRMLS_CC);
Z_DELREF_P(mval);
-#endif
- }
- else
- {
- add_assoc_zval_ex(JSON(the_zstack)[JSON(the_top)], (key.len ? key.c : ""), (key.len ? (key.len + 1) : sizeof("")), mval);
+ } else {
+ add_assoc_zval_ex(jp->the_zstack[jp->top], (key.len ? key.c : ""), (key.len ? (key.len + 1) : sizeof("")), mval);
}
key.len = 0;
}
- the_state = 29;
+ jp->state = KE;
}
break;
case MODE_ARRAY:
- if (type != -1)
- {
- add_next_index_zval(JSON(the_zstack)[JSON(the_top)], mval);
+ if (type != -1) {
+ add_next_index_zval(jp->the_zstack[jp->top], mval);
}
- the_state = 28;
+ jp->state = VA;
break;
default:
FREE_BUFFERS();
@@ -665,117 +688,28 @@ JSON_parser(zval *z, unsigned short p[], int length, int assoc TSRMLS_DC)
JSON_RESET_TYPE();
}
break;
-/*
- :
-*/
+/* : */
case -2:
- if (pop(&the_json, z, MODE_KEY) && push(&the_json, z, MODE_OBJECT)) {
- the_state = 28;
+ if (pop(jp, MODE_KEY) && push(jp, MODE_OBJECT)) {
+ jp->state = VA;
break;
}
/*
syntax error
*/
- case -1:
+ default:
{
+ jp->error = ERROR_SYNTAX;
FREE_BUFFERS();
return false;
}
}
- } else {
-/*
- Change the state and iterate.
-*/
- if (type == IS_STRING)
- {
- if (s == 3 && the_state != 8)
- {
- if (the_state != 4)
- {
- utf16_to_utf8(&buf, b);
- }
- else
- {
- switch (b)
- {
- case 'b':
- smart_str_appendc(&buf, '\b');
- break;
- case 't':
- smart_str_appendc(&buf, '\t');
- break;
- case 'n':
- smart_str_appendc(&buf, '\n');
- break;
- case 'f':
- smart_str_appendc(&buf, '\f');
- break;
- case 'r':
- smart_str_appendc(&buf, '\r');
- break;
- default:
- utf16_to_utf8(&buf, b);
- break;
- }
- }
- }
- else if (s == 6)
- {
- utf16 = dehexchar(b) << 12;
- }
- else if (s == 7)
- {
- utf16 += dehexchar(b) << 8;
- }
- else if (s == 8)
- {
- utf16 += dehexchar(b) << 4;
- }
- else if (s == 3 && the_state == 8)
- {
- utf16 += dehexchar(b);
- utf16_to_utf8(&buf, utf16);
- }
- }
- else if (type < IS_LONG && (c == S_DIG || c == S_ZER))
- {
- type = IS_LONG;
- smart_str_appendc(&buf, b);
- }
- else if (type == IS_LONG && s == 24)
- {
- type = IS_DOUBLE;
- smart_str_appendc(&buf, b);
- }
- else if (type < IS_DOUBLE && c == S_DOT)
- {
- type = IS_DOUBLE;
- smart_str_appendc(&buf, b);
- }
- else if (type < IS_STRING && c == S_QUO)
- {
- type = IS_STRING;
- }
- else if (type < IS_BOOL && ((the_state == 12 && s == 9) || (the_state == 16 && s == 9)))
- {
- type = IS_BOOL;
- }
- else if (type < IS_NULL && the_state == 19 && s == 9)
- {
- type = IS_NULL;
- }
- else if (type != IS_STRING && c > S_WSP)
- {
- utf16_to_utf8(&buf, b);
- }
-
- the_state = s;
}
}
FREE_BUFFERS();
- return the_state == 9 && pop(&the_json, z, MODE_DONE);
+ return jp->state == OK && pop(jp, MODE_DONE);
}
diff --git a/ext/json/JSON_parser.h b/ext/json/JSON_parser.h
index 4790f728e3..9aab62a912 100644
--- a/ext/json/JSON_parser.h
+++ b/ext/json/JSON_parser.h
@@ -1,6 +1,24 @@
-/* JSON_checker.h */
+/* JSON_parser.h */
+
+#ifndef JSON_PARSER_H
+#define JSON_PARSER_H
#include "php.h"
#include "ext/standard/php_smart_str.h"
-extern int JSON_parser(zval *z, unsigned short p[], int length, int assoc TSRMLS_DC);
+#define JSON_PARSER_MAX_DEPTH 512
+
+typedef struct JSON_parser_struct {
+ int state;
+ int depth;
+ int top;
+ int error;
+ int* stack;
+ zval *the_zstack[JSON_PARSER_MAX_DEPTH];
+
+} * JSON_parser;
+
+extern JSON_parser new_JSON_parser(int depth);
+extern int parse_JSON(JSON_parser jp, zval *z, unsigned short utf16_json[], int length, int assoc TSRMLS_DC);
+extern int free_JSON_parser(JSON_parser jp);
+#endif
diff --git a/ext/json/json.c b/ext/json/json.c
index 24afcdf560..e09027576b 100644
--- a/ext/json/json.c
+++ b/ext/json/json.c
@@ -44,6 +44,7 @@ static const char digits[] = "0123456789abcdef";
/* {{{ arginfo */
ZEND_BEGIN_ARG_INFO_EX(arginfo_json_encode, 0, 0, 1)
ZEND_ARG_INFO(0, value)
+ ZEND_ARG_INFO(0, options)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_json_decode, 0, 0, 1)
@@ -443,7 +444,7 @@ static void json_encode_r(smart_str *buf, zval *val, int options TSRMLS_DC) /* {
}
/* }}} */
-/* {{{ proto string json_encode(mixed data)
+/* {{{ proto string json_encode(mixed data [, int options])
Returns the JSON representation of a value */
static PHP_FUNCTION(json_encode)
{
@@ -463,17 +464,19 @@ static PHP_FUNCTION(json_encode)
}
/* }}} */
-/* {{{ proto mixed json_decode(string json [, bool assoc])
+/* {{{ proto mixed json_decode(string json [, bool assoc [, long depth]])
Decodes the JSON representation into a PHP value */
static PHP_FUNCTION(json_decode)
{
char *str;
int str_len, utf16_len;
zend_bool assoc = 0; /* return JS objects as PHP objects by default */
+ long depth = JSON_PARSER_MAX_DEPTH;
zval *z;
unsigned short *utf16;
+ JSON_parser jp;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|b", &str, &str_len, &assoc) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|bl", &str, &str_len, &assoc, &depth) == FAILURE) {
return;
}
@@ -491,11 +494,15 @@ static PHP_FUNCTION(json_decode)
RETURN_NULL();
}
+ /* can be removed once we remove the max depth limit */
+ if (depth <= 0 || depth > JSON_PARSER_MAX_DEPTH) {
+ depth = JSON_PARSER_MAX_DEPTH;
+ }
+
ALLOC_INIT_ZVAL(z);
- if (JSON_parser(z, utf16, utf16_len, assoc TSRMLS_CC)) {
+ jp = new_JSON_parser(depth);
+ if (parse_JSON(jp, z, utf16, utf16_len, assoc TSRMLS_CC)) {
*return_value = *z;
- FREE_ZVAL(z);
- efree(utf16);
}
else
{
@@ -503,28 +510,30 @@ static PHP_FUNCTION(json_decode)
int type;
long p;
- zval_dtor(z);
- FREE_ZVAL(z);
- efree(utf16);
-
+ RETVAL_NULL();
if (str_len == 4) {
if (!strcasecmp(str, "null")) {
- RETURN_NULL();
+ RETVAL_NULL();
} else if (!strcasecmp(str, "true")) {
- RETURN_BOOL(1);
+ RETVAL_BOOL(1);
}
} else if (str_len == 5 && !strcasecmp(str, "false")) {
- RETURN_BOOL(0);
+ RETVAL_BOOL(0);
}
+
if ((type = is_numeric_string(str, str_len, &p, &d, 0)) != 0) {
if (type == IS_LONG) {
- RETURN_LONG(p);
+ RETVAL_LONG(p);
} else if (type == IS_DOUBLE) {
- RETURN_DOUBLE(d);
+ RETVAL_DOUBLE(d);
}
}
- RETURN_NULL();
+
+ zval_dtor(z);
}
+ FREE_ZVAL(z);
+ efree(utf16);
+ free_JSON_parser(jp);
}
/* }}} */