1 files changed, 255 insertions, 176 deletions
diff --git a/Zend/zend_language_parser.y b/Zend/zend_language_parser.y
index aa3164b735..40938256f2 100644
--- a/Zend/zend_language_parser.y
+++ b/Zend/zend_language_parser.y
@@ -86,146 +86,146 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
 %precedence T_ELSEIF
 %precedence T_ELSE
 
-%token <ast> T_LNUMBER   "integer number (T_LNUMBER)"
-%token <ast> T_DNUMBER   "floating-point number (T_DNUMBER)"
-%token <ast> T_STRING    "identifier (T_STRING)"
-%token <ast> T_VARIABLE  "variable (T_VARIABLE)"
+%token <ast> T_LNUMBER   "integer"
+%token <ast> T_DNUMBER   "floating-point number"
+%token <ast> T_STRING    "identifier"
+%token <ast> T_VARIABLE  "variable"
 %token <ast> T_INLINE_HTML
-%token <ast> T_ENCAPSED_AND_WHITESPACE  "quoted-string and whitespace (T_ENCAPSED_AND_WHITESPACE)"
-%token <ast> T_CONSTANT_ENCAPSED_STRING "quoted-string (T_CONSTANT_ENCAPSED_STRING)"
-%token <ast> T_STRING_VARNAME "variable name (T_STRING_VARNAME)"
-%token <ast> T_NUM_STRING "number (T_NUM_STRING)"
-
-%token <ident> T_INCLUDE       "include (T_INCLUDE)"
-%token <ident> T_INCLUDE_ONCE  "include_once (T_INCLUDE_ONCE)"
-%token <ident> T_EVAL          "eval (T_EVAL)"
-%token <ident> T_REQUIRE       "require (T_REQUIRE)"
-%token <ident> T_REQUIRE_ONCE  "require_once (T_REQUIRE_ONCE)"
-%token <ident> T_LOGICAL_OR    "or (T_LOGICAL_OR)"
-%token <ident> T_LOGICAL_XOR   "xor (T_LOGICAL_XOR)"
-%token <ident> T_LOGICAL_AND   "and (T_LOGICAL_AND)"
-%token <ident> T_PRINT         "print (T_PRINT)"
-%token <ident> T_YIELD         "yield (T_YIELD)"
-%token <ident> T_YIELD_FROM    "yield from (T_YIELD_FROM)"
-%token <ident> T_INSTANCEOF    "instanceof (T_INSTANCEOF)"
-%token <ident> T_NEW           "new (T_NEW)"
-%token <ident> T_CLONE         "clone (T_CLONE)"
-%token <ident> T_EXIT          "exit (T_EXIT)"
-%token <ident> T_IF            "if (T_IF)"
-%token <ident> T_ELSEIF        "elseif (T_ELSEIF)"
-%token <ident> T_ELSE          "else (T_ELSE)"
-%token <ident> T_ENDIF         "endif (T_ENDIF)"
-%token <ident> T_ECHO          "echo (T_ECHO)"
-%token <ident> T_DO            "do (T_DO)"
-%token <ident> T_WHILE         "while (T_WHILE)"
-%token <ident> T_ENDWHILE      "endwhile (T_ENDWHILE)"
-%token <ident> T_FOR           "for (T_FOR)"
-%token <ident> T_ENDFOR        "endfor (T_ENDFOR)"
-%token <ident> T_FOREACH       "foreach (T_FOREACH)"
-%token <ident> T_ENDFOREACH    "endforeach (T_ENDFOREACH)"
-%token <ident> T_DECLARE       "declare (T_DECLARE)"
-%token <ident> T_ENDDECLARE    "enddeclare (T_ENDDECLARE)"
-%token <ident> T_AS            "as (T_AS)"
-%token <ident> T_SWITCH        "switch (T_SWITCH)"
-%token <ident> T_ENDSWITCH     "endswitch (T_ENDSWITCH)"
-%token <ident> T_CASE          "case (T_CASE)"
-%token <ident> T_DEFAULT       "default (T_DEFAULT)"
-%token <ident> T_MATCH         "match (T_MATCH)"
-%token <ident> T_BREAK         "break (T_BREAK)"
-%token <ident> T_CONTINUE      "continue (T_CONTINUE)"
-%token <ident> T_GOTO          "goto (T_GOTO)"
-%token <ident> T_FUNCTION      "function (T_FUNCTION)"
-%token <ident> T_FN            "fn (T_FN)"
-%token <ident> T_CONST         "const (T_CONST)"
-%token <ident> T_RETURN        "return (T_RETURN)"
-%token <ident> T_TRY           "try (T_TRY)"
-%token <ident> T_CATCH         "catch (T_CATCH)"
-%token <ident> T_FINALLY       "finally (T_FINALLY)"
-%token <ident> T_THROW         "throw (T_THROW)"
-%token <ident> T_USE           "use (T_USE)"
-%token <ident> T_INSTEADOF     "insteadof (T_INSTEADOF)"
-%token <ident> T_GLOBAL        "global (T_GLOBAL)"
-%token <ident> T_STATIC        "static (T_STATIC)"
-%token <ident> T_ABSTRACT      "abstract (T_ABSTRACT)"
-%token <ident> T_FINAL         "final (T_FINAL)"
-%token <ident> T_PRIVATE       "private (T_PRIVATE)"
-%token <ident> T_PROTECTED     "protected (T_PROTECTED)"
-%token <ident> T_PUBLIC        "public (T_PUBLIC)"
-%token <ident> T_VAR           "var (T_VAR)"
-%token <ident> T_UNSET         "unset (T_UNSET)"
-%token <ident> T_ISSET         "isset (T_ISSET)"
-%token <ident> T_EMPTY         "empty (T_EMPTY)"
-%token <ident> T_HALT_COMPILER "__halt_compiler (T_HALT_COMPILER)"
-%token <ident> T_CLASS         "class (T_CLASS)"
-%token <ident> T_TRAIT         "trait (T_TRAIT)"
-%token <ident> T_INTERFACE     "interface (T_INTERFACE)"
-%token <ident> T_EXTENDS       "extends (T_EXTENDS)"
-%token <ident> T_IMPLEMENTS    "implements (T_IMPLEMENTS)"
-%token <ident> T_NAMESPACE     "namespace (T_NAMESPACE)"
-%token <ident> T_LIST            "list (T_LIST)"
-%token <ident> T_ARRAY           "array (T_ARRAY)"
-%token <ident> T_CALLABLE        "callable (T_CALLABLE)"
-%token <ident> T_LINE            "__LINE__ (T_LINE)"
-%token <ident> T_FILE            "__FILE__ (T_FILE)"
-%token <ident> T_DIR             "__DIR__ (T_DIR)"
-%token <ident> T_CLASS_C         "__CLASS__ (T_CLASS_C)"
-%token <ident> T_TRAIT_C         "__TRAIT__ (T_TRAIT_C)"
-%token <ident> T_METHOD_C        "__METHOD__ (T_METHOD_C)"
-%token <ident> T_FUNC_C          "__FUNCTION__ (T_FUNC_C)"
-%token <ident> T_NS_C            "__NAMESPACE__ (T_NS_C)"
+%token <ast> T_ENCAPSED_AND_WHITESPACE  "string content"
+%token <ast> T_CONSTANT_ENCAPSED_STRING "quoted string"
+%token <ast> T_STRING_VARNAME "variable name"
+%token <ast> T_NUM_STRING "number"
+
+%token <ident> T_INCLUDE       "'include'"
+%token <ident> T_INCLUDE_ONCE  "'include_once'"
+%token <ident> T_EVAL          "'eval'"
+%token <ident> T_REQUIRE       "'require'"
+%token <ident> T_REQUIRE_ONCE  "'require_once'"
+%token <ident> T_LOGICAL_OR    "'or'"
+%token <ident> T_LOGICAL_XOR   "'xor'"
+%token <ident> T_LOGICAL_AND   "'and'"
+%token <ident> T_PRINT         "'print'"
+%token <ident> T_YIELD         "'yield'"
+%token <ident> T_YIELD_FROM    "'yield from'"
+%token <ident> T_INSTANCEOF    "'instanceof'"
+%token <ident> T_NEW           "'new'"
+%token <ident> T_CLONE         "'clone'"
+%token <ident> T_EXIT          "'exit'"
+%token <ident> T_IF            "'if'"
+%token <ident> T_ELSEIF        "'elseif'"
+%token <ident> T_ELSE          "'else'"
+%token <ident> T_ENDIF         "'endif'"
+%token <ident> T_ECHO          "'echo'"
+%token <ident> T_DO            "'do'"
+%token <ident> T_WHILE         "'while'"
+%token <ident> T_ENDWHILE      "'endwhile'"
+%token <ident> T_FOR           "'for'"
+%token <ident> T_ENDFOR        "'endfor'"
+%token <ident> T_FOREACH       "'foreach'"
+%token <ident> T_ENDFOREACH    "'endforeach'"
+%token <ident> T_DECLARE       "'declare'"
+%token <ident> T_ENDDECLARE    "'enddeclare'"
+%token <ident> T_AS            "'as'"
+%token <ident> T_SWITCH        "'switch'"
+%token <ident> T_ENDSWITCH     "'endswitch'"
+%token <ident> T_CASE          "'case'"
+%token <ident> T_DEFAULT       "'default'"
+%token <ident> T_MATCH         "'match'"
+%token <ident> T_BREAK         "'break'"
+%token <ident> T_CONTINUE      "'continue'"
+%token <ident> T_GOTO          "'goto'"
+%token <ident> T_FUNCTION      "'function'"
+%token <ident> T_FN            "'fn'"
+%token <ident> T_CONST         "'const'"
+%token <ident> T_RETURN        "'return'"
+%token <ident> T_TRY           "'try'"
+%token <ident> T_CATCH         "'catch'"
+%token <ident> T_FINALLY       "'finally'"
+%token <ident> T_THROW         "'throw'"
+%token <ident> T_USE           "'use'"
+%token <ident> T_INSTEADOF     "'insteadof'"
+%token <ident> T_GLOBAL        "'global'"
+%token <ident> T_STATIC        "'static'"
+%token <ident> T_ABSTRACT      "'abstract'"
+%token <ident> T_FINAL         "'final'"
+%token <ident> T_PRIVATE       "'private'"
+%token <ident> T_PROTECTED     "'protected'"
+%token <ident> T_PUBLIC        "'public'"
+%token <ident> T_VAR           "'var'"
+%token <ident> T_UNSET         "'unset'"
+%token <ident> T_ISSET         "'isset'"
+%token <ident> T_EMPTY         "'empty'"
+%token <ident> T_HALT_COMPILER "'__halt_compiler'"
+%token <ident> T_CLASS         "'class'"
+%token <ident> T_TRAIT         "'trait'"
+%token <ident> T_INTERFACE     "'interface'"
+%token <ident> T_EXTENDS       "'extends'"
+%token <ident> T_IMPLEMENTS    "'implements'"
+%token <ident> T_NAMESPACE     "'namespace'"
+%token <ident> T_LIST            "'list'"
+%token <ident> T_ARRAY           "'array'"
+%token <ident> T_CALLABLE        "'callable'"
+%token <ident> T_LINE            "'__LINE__'"
+%token <ident> T_FILE            "'__FILE__'"
+%token <ident> T_DIR             "'__DIR__'"
+%token <ident> T_CLASS_C         "'__CLASS__'"
+%token <ident> T_TRAIT_C         "'__TRAIT__'"
+%token <ident> T_METHOD_C        "'__METHOD__'"
+%token <ident> T_FUNC_C          "'__FUNCTION__'"
+%token <ident> T_NS_C            "'__NAMESPACE__'"
 
 %token END 0 "end of file"
-%token T_PLUS_EQUAL   "+= (T_PLUS_EQUAL)"
-%token T_MINUS_EQUAL  "-= (T_MINUS_EQUAL)"
-%token T_MUL_EQUAL    "*= (T_MUL_EQUAL)"
-%token T_DIV_EQUAL    "/= (T_DIV_EQUAL)"
-%token T_CONCAT_EQUAL ".= (T_CONCAT_EQUAL)"
-%token T_MOD_EQUAL    "%= (T_MOD_EQUAL)"
-%token T_AND_EQUAL    "&= (T_AND_EQUAL)"
-%token T_OR_EQUAL     "|= (T_OR_EQUAL)"
-%token T_XOR_EQUAL    "^= (T_XOR_EQUAL)"
-%token T_SL_EQUAL     "<<= (T_SL_EQUAL)"
-%token T_SR_EQUAL     ">>= (T_SR_EQUAL)"
-%token T_COALESCE_EQUAL "??= (T_COALESCE_EQUAL)"
-%token T_BOOLEAN_OR   "|| (T_BOOLEAN_OR)"
-%token T_BOOLEAN_AND  "&& (T_BOOLEAN_AND)"
-%token T_IS_EQUAL     "== (T_IS_EQUAL)"
-%token T_IS_NOT_EQUAL "!= (T_IS_NOT_EQUAL)"
-%token T_IS_IDENTICAL "=== (T_IS_IDENTICAL)"
-%token T_IS_NOT_IDENTICAL "!== (T_IS_NOT_IDENTICAL)"
-%token T_IS_SMALLER_OR_EQUAL "<= (T_IS_SMALLER_OR_EQUAL)"
-%token T_IS_GREATER_OR_EQUAL ">= (T_IS_GREATER_OR_EQUAL)"
-%token T_SPACESHIP "<=> (T_SPACESHIP)"
-%token T_SL "<< (T_SL)"
-%token T_SR ">> (T_SR)"
-%token T_INC "++ (T_INC)"
-%token T_DEC "-- (T_DEC)"
-%token T_INT_CAST    "(int) (T_INT_CAST)"
-%token T_DOUBLE_CAST "(double) (T_DOUBLE_CAST)"
-%token T_STRING_CAST "(string) (T_STRING_CAST)"
-%token T_ARRAY_CAST  "(array) (T_ARRAY_CAST)"
-%token T_OBJECT_CAST "(object) (T_OBJECT_CAST)"
-%token T_BOOL_CAST   "(bool) (T_BOOL_CAST)"
-%token T_UNSET_CAST  "(unset) (T_UNSET_CAST)"
-%token T_OBJECT_OPERATOR "-> (T_OBJECT_OPERATOR)"
-%token T_DOUBLE_ARROW    "=> (T_DOUBLE_ARROW)"
-%token T_COMMENT         "comment (T_COMMENT)"
-%token T_DOC_COMMENT     "doc comment (T_DOC_COMMENT)"
-%token T_OPEN_TAG        "open tag (T_OPEN_TAG)"
-%token T_OPEN_TAG_WITH_ECHO "open tag with echo (T_OPEN_TAG_WITH_ECHO)"
-%token T_CLOSE_TAG       "close tag (T_CLOSE_TAG)"
-%token T_WHITESPACE      "whitespace (T_WHITESPACE)"
-%token T_START_HEREDOC   "heredoc start (T_START_HEREDOC)"
-%token T_END_HEREDOC     "heredoc end (T_END_HEREDOC)"
-%token T_DOLLAR_OPEN_CURLY_BRACES "${ (T_DOLLAR_OPEN_CURLY_BRACES)"
-%token T_CURLY_OPEN      "{$ (T_CURLY_OPEN)"
-%token T_PAAMAYIM_NEKUDOTAYIM ":: (T_PAAMAYIM_NEKUDOTAYIM)"
-%token T_NS_SEPARATOR    "\\ (T_NS_SEPARATOR)"
-%token T_ELLIPSIS        "... (T_ELLIPSIS)"
-%token T_COALESCE        "?? (T_COALESCE)"
-%token T_POW             "** (T_POW)"
-%token T_POW_EQUAL       "**= (T_POW_EQUAL)"
-%token T_BAD_CHARACTER   "invalid character (T_BAD_CHARACTER)"
+%token T_PLUS_EQUAL   "'+='"
+%token T_MINUS_EQUAL  "'-='"
+%token T_MUL_EQUAL    "'*='"
+%token T_DIV_EQUAL    "'/='"
+%token T_CONCAT_EQUAL "'.='"
+%token T_MOD_EQUAL    "'%='"
+%token T_AND_EQUAL    "'&='"
+%token T_OR_EQUAL     "'|='"
+%token T_XOR_EQUAL    "'^='"
+%token T_SL_EQUAL     "'<<='"
+%token T_SR_EQUAL     "'>>='"
+%token T_COALESCE_EQUAL "'??='"
+%token T_BOOLEAN_OR   "'||'"
+%token T_BOOLEAN_AND  "'&&'"
+%token T_IS_EQUAL     "'=='"
+%token T_IS_NOT_EQUAL "'!='"
+%token T_IS_IDENTICAL "'==='"
+%token T_IS_NOT_IDENTICAL "'!=='"
+%token T_IS_SMALLER_OR_EQUAL "'<='"
+%token T_IS_GREATER_OR_EQUAL "'>='"
+%token T_SPACESHIP "'<=>'"
+%token T_SL "'<<'"
+%token T_SR "'>>'"
+%token T_INC "'++'"
+%token T_DEC "'--'"
+%token T_INT_CAST    "'(int)'"
+%token T_DOUBLE_CAST "'(double)'"
+%token T_STRING_CAST "'(string)'"
+%token T_ARRAY_CAST  "'(array)'"
+%token T_OBJECT_CAST "'(object)'"
+%token T_BOOL_CAST   "'(bool)'"
+%token T_UNSET_CAST  "'(unset)'"
+%token T_OBJECT_OPERATOR "'->'"
+%token T_DOUBLE_ARROW    "'=>'"
+%token T_COMMENT         "comment"
+%token T_DOC_COMMENT     "doc comment"
+%token T_OPEN_TAG        "open tag"
+%token T_OPEN_TAG_WITH_ECHO "'<?='"
+%token T_CLOSE_TAG       "'?>'"
+%token T_WHITESPACE      "whitespace"
+%token T_START_HEREDOC   "heredoc start"
+%token T_END_HEREDOC     "heredoc end"
+%token T_DOLLAR_OPEN_CURLY_BRACES "'${'"
+%token T_CURLY_OPEN      "'{$'"
+%token T_PAAMAYIM_NEKUDOTAYIM "'::'"
+%token T_NS_SEPARATOR    "'\\'"
+%token T_ELLIPSIS        "'...'"
+%token T_COALESCE        "'??'"
+%token T_POW             "'**'"
+%token T_POW_EQUAL       "'**='"
+%token T_BAD_CHARACTER   "invalid character"
 
 /* Token used to force a parse error from the lexer */
 %token T_ERROR
@@ -1438,15 +1438,16 @@ isset_variable:
 
 %%
 
-/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
-   quotes and backslashes, so that it's suitable for yyerror.  The
-   heuristic is that double-quoting is unnecessary unless the string
-   contains an apostrophe, a comma, or backslash (other than
-   backslash-backslash).  YYSTR is taken from yytname.  If YYRES is
-   null, do not copy; instead, return the length of what the result
-   would have been.  */
+/* Over-ride Bison formatting routine to give better token descriptions.
+   Copy to YYRES the contents of YYSTR for use in yyerror.
+   YYSTR is taken from yytname, from the %token declaration.
+   If YYRES is null, do not copy; instead, return the length of what
+   the result would have been.  */
 static YYSIZE_T zend_yytnamerr(char *yyres, const char *yystr)
 {
+	const char *toktype = yystr;
+	size_t toktype_len = strlen(toktype);
+
 	/* CG(parse_error) states:
 	 * 0 => yyres = NULL, yystr is the unexpected token
 	 * 1 => yyres = NULL, yystr is one of the expected tokens
@@ -1460,63 +1461,141 @@ static YYSIZE_T zend_yytnamerr(char *yyres, const char *yystr)
 	if (CG(parse_error) % 2 == 0) {
 		/* The unexpected token */
 		char buffer[120];
-		const unsigned char *end, *str, *tok1 = NULL, *tok2 = NULL;
-		unsigned int len = 0, toklen = 0, yystr_len;
+		const unsigned char *tokcontent, *tokcontent_end;
+		size_t tokcontent_len;
 
 		CG(parse_error)++;
 
 		if (LANG_SCNG(yy_text)[0] == 0 &&
 			LANG_SCNG(yy_leng) == 1 &&
-			strcmp(yystr, "\"end of file\"") == 0) {
+			strcmp(toktype, "\"end of file\"") == 0) {
 			if (yyres) {
 				yystpcpy(yyres, "end of file");
 			}
 			return sizeof("end of file")-1;
 		}
 
-		str = LANG_SCNG(yy_text);
-		end = memchr(str, '\n', LANG_SCNG(yy_leng));
-		yystr_len = (unsigned int)strlen(yystr);
+		/* Prevent the backslash getting doubled in the output (eugh) */
+		if (strcmp(toktype, "\"'\\\\'\"") == 0) {
+			if (yyres) {
+				yystpcpy(yyres, "token \"\\\"");
+			}
+			return sizeof("token \"\\\"")-1;
+		}
 
-		if ((tok1 = memchr(yystr, '(', yystr_len)) != NULL
-			&& (tok2 = zend_memrchr(yystr, ')', yystr_len)) != NULL) {
-			toklen = (tok2 - tok1) + 1;
-		} else {
-			tok1 = tok2 = NULL;
-			toklen = 0;
+		/* Avoid unreadable """ */
+		/* "'" would theoretically be just as bad, but is never currently parsed as a separate token */
+		if (strcmp(toktype, "'\"'") == 0) {
+			if (yyres) {
+				yystpcpy(yyres, "double-quote mark");
+			}
+			return sizeof("double-quote mark")-1;
 		}
 
-		if (end == NULL) {
-			len = LANG_SCNG(yy_leng) > 30 ? 30 : LANG_SCNG(yy_leng);
-		} else {
-			len = (end - str) > 30 ? 30 : (end - str);
+		/* Strip off the outer quote marks */
+		if (toktype_len >= 2 && *toktype == '"') {
+			toktype++;
+			toktype_len -= 2;
 		}
-		if (yyres) {
-			if (toklen) {
-				snprintf(buffer, sizeof(buffer), "'%.*s' %.*s", len, str, toklen, tok1);
-			} else {
-				snprintf(buffer, sizeof(buffer), "'%.*s'", len, str);
+
+		/* If the token always has one form, the %token line should have a single-quoted name */
+		/* The parser rules also include single-character un-named tokens which will be single-quoted here */
+		/* We re-format this with double quotes here to ensure everything's consistent */
+		if (toktype_len > 0 && *toktype == '\'') {
+			if (yyres) {
+				snprintf(buffer, sizeof(buffer), "token \"%.*s\"", (int)toktype_len-2, toktype+1);
+				yystpcpy(yyres, buffer);
+			}
+			return toktype_len + sizeof("token ")-1;
+		}
+
+		/* Fetch the content of the last seen token from global lexer state */
+		tokcontent = LANG_SCNG(yy_text);
+		tokcontent_len = LANG_SCNG(yy_leng);
+
+		/* For T_BAD_CHARACTER, the content probably won't be a printable char */
+		/* Also, "unexpected invalid character" sounds a bit redundant */
+		if (tokcontent_len == 1 && strcmp(yystr, "\"invalid character\"") == 0) {
+			if (yyres) {
+				snprintf(buffer, sizeof(buffer), "character 0x%02hhX", *tokcontent);
+				yystpcpy(yyres, buffer);
+			}
+			return sizeof("character 0x00")-1;
+		}
+
+		/* Truncate at line end to avoid messing up log formats */
+		tokcontent_end = memchr(tokcontent, '\n', tokcontent_len);
+		if (tokcontent_end != NULL) {
+			tokcontent_len = (tokcontent_end - tokcontent);
+		}
+
+		/* Try to be helpful about what kind of string was found, before stripping the quotes */
+		if (tokcontent_len > 0 && strcmp(yystr, "\"quoted string\"") == 0) {
+			if (*tokcontent == '"') {
+				toktype = "double-quoted string";
+				toktype_len = sizeof("double-quoted string")-1;
 			}
+			else if (*tokcontent == '\'') {
+				toktype = "single-quoted string";
+				toktype_len = sizeof("single-quoted string")-1;
+			}
+		}
+
+		/* For quoted strings, strip off another layer of quotes to avoid putting quotes inside quotes */
+		if (tokcontent_len > 0 && (*tokcontent == '\'' || *tokcontent=='"'))  {
+			tokcontent++;
+			tokcontent_len--;
+		}
+		if (tokcontent_len > 0 && (tokcontent[tokcontent_len-1] == '\'' || tokcontent[tokcontent_len-1] == '"'))  {
+			tokcontent_len--;
+		}
+
+		/* Truncate to 30 characters and add a ... */
+		if (tokcontent_len > 30 + sizeof("...")-1) {
+			if (yyres) {
+				snprintf(buffer, sizeof(buffer), "%.*s \"%.*s...\"", (int)toktype_len, toktype, 30, tokcontent);
+				yystpcpy(yyres, buffer);
+			}
+			return toktype_len + 30 + sizeof(" \"...\"")-1;
+		}
+
+		if (yyres) {
+			snprintf(buffer, sizeof(buffer), "%.*s \"%.*s\"", (int)toktype_len, toktype, (int)tokcontent_len, tokcontent);
 			yystpcpy(yyres, buffer);
 		}
-		return len + (toklen ? toklen + 1 : 0) + 2;
+		return toktype_len + tokcontent_len + sizeof(" \"\"")-1;
 	}
 
 	/* One of the expected tokens */
-	if (!yyres) {
-		return strlen(yystr) - (*yystr == '"' ? 2 : 0);
+
+	/* Prevent the backslash getting doubled in the output (eugh) */
+	if (strcmp(toktype, "\"'\\\\'\"") == 0) {
+		if (yyres) {
+			yystpcpy(yyres, "\"\\\"");
+		}
+		return sizeof("\"\\\"")-1;
 	}
 
-	if (*yystr == '"') {
+	/* Strip off the outer quote marks */
+	if (toktype_len >= 2 && *toktype == '"') {
+		toktype++;
+		toktype_len -= 2;
+	}
+
+	if (yyres) {
 		YYSIZE_T yyn = 0;
-		const char *yyp = yystr;
 
-		for (; *++yyp != '"'; ++yyn) {
-			yyres[yyn] = *yyp;
+		for (; yyn < toktype_len; ++yyn) {
+			/* Replace single quotes with double for consistency */
+			if (toktype[yyn] == '\'') {
+				yyres[yyn] = '"';
+			}
+			else {
+				yyres[yyn] = toktype[yyn];
+			}
 		}
-		yyres[yyn] = '\0';
-		return yyn;
+		yyres[toktype_len] = '\0';
 	}
-	yystpcpy(yyres, yystr);
-	return strlen(yystr);
+
+	return toktype_len;
 }