summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2021-09-03 13:14:56 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2021-09-03 13:14:56 -0300
commit9db4bfed6bb9d5828c99c0f24749eedf54d70cc2 (patch)
tree5a7bae2573f3e08813680a2506840f2356d18cd8
parent91673a8ec0ae55e188a790bd2dfdc99246adf20e (diff)
downloadlua-github-9db4bfed6bb9d5828c99c0f24749eedf54d70cc2.tar.gz
Revamp of format validation in 'string.format'
When calling 'sprintf', not all conversion specifiers accept all flags; some combinations are undefined behavior.
-rw-r--r--lstrlib.c112
-rw-r--r--manual/manual.of6
-rw-r--r--testes/strings.lua36
3 files changed, 118 insertions, 36 deletions
diff --git a/lstrlib.c b/lstrlib.c
index 74501f78..e3b8df0f 100644
--- a/lstrlib.c
+++ b/lstrlib.c
@@ -1090,13 +1090,31 @@ static int lua_number2strx (lua_State *L, char *buff, int sz,
/* valid flags in a format specification */
-#if !defined(L_FMTFLAGS)
-#define L_FMTFLAGS "-+ #0"
+#if !defined(L_FMTFLAGSF)
+
+/* valid flags for a, A, e, E, f, F, g, and G conversions */
+#define L_FMTFLAGSF "-+#0 "
+
+/* valid flags for o, x, and X conversions */
+#define L_FMTFLAGSX "-#0"
+
+/* valid flags for d and i conversions */
+#define L_FMTFLAGSI "-+0 "
+
+/* valid flags for u conversions */
+#define L_FMTFLAGSU "-0"
+
+/* valid flags for c, p, and s conversions */
+#define L_FMTFLAGSC "-"
+
#endif
/*
-** maximum size of each format specification (such as "%-099.99d")
+** Maximum size of each format specification (such as "%-099.99d"):
+** Initial '%', flags (up to 5), width (2), period, precision (2),
+** length modifier (8), conversion specifier, and final '\0', plus some
+** extra.
*/
#define MAX_FORMAT 32
@@ -1189,25 +1207,53 @@ static void addliteral (lua_State *L, luaL_Buffer *b, int arg) {
}
-static const char *scanformat (lua_State *L, const char *strfrmt, char *form) {
- const char *p = strfrmt;
- while (*p != '\0' && strchr(L_FMTFLAGS, *p) != NULL) p++; /* skip flags */
- if ((size_t)(p - strfrmt) >= sizeof(L_FMTFLAGS)/sizeof(char))
- luaL_error(L, "invalid format (repeated flags)");
- if (isdigit(uchar(*p))) p++; /* skip width */
- if (isdigit(uchar(*p))) p++; /* (2 digits at most) */
- if (*p == '.') {
- p++;
- if (isdigit(uchar(*p))) p++; /* skip precision */
- if (isdigit(uchar(*p))) p++; /* (2 digits at most) */
+static const char *get2digits (const char *s) {
+ if (isdigit(uchar(*s))) {
+ s++;
+ if (isdigit(uchar(*s))) s++; /* (2 digits at most) */
+ }
+ return s;
+}
+
+
+/*
+** Chech whether a conversion specification is valid. When called,
+** first character in 'form' must be '%' and last character must
+** be a valid conversion specifier. 'flags' are the accepted flags;
+** 'precision' signals whether to accept a precision.
+*/
+static void checkformat (lua_State *L, const char *form, const char *flags,
+ int precision) {
+ const char *spec = form + 1; /* skip '%' */
+ spec += strspn(spec, flags); /* skip flags */
+ if (*spec != '0') { /* a width cannot start with '0' */
+ spec = get2digits(spec); /* skip width */
+ if (*spec == '.' && precision) {
+ spec++;
+ spec = get2digits(spec); /* skip precision */
+ }
}
- if (isdigit(uchar(*p)))
- luaL_error(L, "invalid format (width or precision too long)");
+ if (!isalpha(uchar(*spec))) /* did not go to the end? */
+ luaL_error(L, "invalid conversion specification: '%s'", form);
+}
+
+
+/*
+** Get a conversion specification and copy it to 'form'.
+** Return the address of its last character.
+*/
+static const char *getformat (lua_State *L, const char *strfrmt,
+ char *form) {
+ /* spans flags, width, and precision ('0' is included as a flag) */
+ size_t len = strspn(strfrmt, L_FMTFLAGSF "123456789.");
+ len++; /* adds following character (should be the specifier) */
+ /* still needs space for '%', '\0', plus a length modifier */
+ if (len >= MAX_FORMAT - 10)
+ luaL_error(L, "invalid format (too long)");
*(form++) = '%';
- memcpy(form, strfrmt, ((p - strfrmt) + 1) * sizeof(char));
- form += (p - strfrmt) + 1;
- *form = '\0';
- return p;
+ memcpy(form, strfrmt, len * sizeof(char));
+ *(form + len) = '\0';
+ return strfrmt + len - 1;
}
@@ -1230,6 +1276,7 @@ static int str_format (lua_State *L) {
size_t sfl;
const char *strfrmt = luaL_checklstring(L, arg, &sfl);
const char *strfrmt_end = strfrmt+sfl;
+ const char *flags;
luaL_Buffer b;
luaL_buffinit(L, &b);
while (strfrmt < strfrmt_end) {
@@ -1239,25 +1286,35 @@ static int str_format (lua_State *L) {
luaL_addchar(&b, *strfrmt++); /* %% */
else { /* format item */
char form[MAX_FORMAT]; /* to store the format ('%...') */
- int maxitem = MAX_ITEM;
- char *buff = luaL_prepbuffsize(&b, maxitem); /* to put formatted item */
- int nb = 0; /* number of bytes in added item */
+ int maxitem = MAX_ITEM; /* maximum length for the result */
+ char *buff = luaL_prepbuffsize(&b, maxitem); /* to put result */
+ int nb = 0; /* number of bytes in result */
if (++arg > top)
return luaL_argerror(L, arg, "no value");
- strfrmt = scanformat(L, strfrmt, form);
+ strfrmt = getformat(L, strfrmt, form);
switch (*strfrmt++) {
case 'c': {
+ checkformat(L, form, L_FMTFLAGSC, 0);
nb = l_sprintf(buff, maxitem, form, (int)luaL_checkinteger(L, arg));
break;
}
case 'd': case 'i':
- case 'o': case 'u': case 'x': case 'X': {
+ flags = L_FMTFLAGSI;
+ goto intcase;
+ case 'u':
+ flags = L_FMTFLAGSU;
+ goto intcase;
+ case 'o': case 'x': case 'X':
+ flags = L_FMTFLAGSX;
+ intcase: {
lua_Integer n = luaL_checkinteger(L, arg);
+ checkformat(L, form, flags, 1);
addlenmod(form, LUA_INTEGER_FRMLEN);
nb = l_sprintf(buff, maxitem, form, (LUAI_UACINT)n);
break;
}
case 'a': case 'A':
+ checkformat(L, form, L_FMTFLAGSF, 1);
addlenmod(form, LUA_NUMBER_FRMLEN);
nb = lua_number2strx(L, buff, maxitem, form,
luaL_checknumber(L, arg));
@@ -1268,12 +1325,14 @@ static int str_format (lua_State *L) {
/* FALLTHROUGH */
case 'e': case 'E': case 'g': case 'G': {
lua_Number n = luaL_checknumber(L, arg);
+ checkformat(L, form, L_FMTFLAGSF, 1);
addlenmod(form, LUA_NUMBER_FRMLEN);
nb = l_sprintf(buff, maxitem, form, (LUAI_UACNUMBER)n);
break;
}
case 'p': {
const void *p = lua_topointer(L, arg);
+ checkformat(L, form, L_FMTFLAGSC, 0);
if (p == NULL) { /* avoid calling 'printf' with argument NULL */
p = "(null)"; /* result */
form[strlen(form) - 1] = 's'; /* format it as a string */
@@ -1294,7 +1353,8 @@ static int str_format (lua_State *L) {
luaL_addvalue(&b); /* keep entire string */
else {
luaL_argcheck(L, l == strlen(s), arg, "string contains zeros");
- if (!strchr(form, '.') && l >= 100) {
+ checkformat(L, form, L_FMTFLAGSC, 1);
+ if (strchr(form, '.') == NULL && l >= 100) {
/* no precision and string is too long to be formatted */
luaL_addvalue(&b); /* keep entire string */
}
diff --git a/manual/manual.of b/manual/manual.of
index 664b5c1e..ea9a0302 100644
--- a/manual/manual.of
+++ b/manual/manual.of
@@ -7078,8 +7078,10 @@ following the description given in its first argument,
which must be a string.
The format string follows the same rules as the @ANSI{sprintf}.
The only differences are that the conversion specifiers and modifiers
-@T{*}, @id{h}, @id{L}, @id{l}, and @id{n} are not supported
+@id{F}, @id{n}, @T{*}, @id{h}, @id{L}, and @id{l} are not supported
and that there is an extra specifier, @id{q}.
+Both width and precision, when present,
+are limited to two digits.
The specifier @id{q} formats booleans, nil, numbers, and strings
in a way that the result is a valid constant in Lua source code.
@@ -7099,7 +7101,7 @@ may produce the string:
"a string with \"quotes\" and \
new line"
}
-This specifier does not support modifiers (flags, width, length).
+This specifier does not support modifiers (flags, width, precision).
The conversion specifiers
@id{A}, @id{a}, @id{E}, @id{e}, @id{f},
diff --git a/testes/strings.lua b/testes/strings.lua
index 61a06a25..184fa651 100644
--- a/testes/strings.lua
+++ b/testes/strings.lua
@@ -202,13 +202,11 @@ assert(string.format("\0%c\0%c%x\0", string.byte("\xe4"), string.byte("b"), 140)
"\0\xe4\0b8c\0")
assert(string.format('') == "")
assert(string.format("%c",34)..string.format("%c",48)..string.format("%c",90)..string.format("%c",100) ==
- string.format("%c%c%c%c", 34, 48, 90, 100))
+ string.format("%1c%-c%-1c%c", 34, 48, 90, 100))
assert(string.format("%s\0 is not \0%s", 'not be', 'be') == 'not be\0 is not \0be')
assert(string.format("%%%d %010d", 10, 23) == "%10 0000000023")
assert(tonumber(string.format("%f", 10.3)) == 10.3)
-x = string.format('"%-50s"', 'a')
-assert(#x == 52)
-assert(string.sub(x, 1, 4) == '"a ')
+assert(string.format('"%-50s"', 'a') == '"a' .. string.rep(' ', 49) .. '"')
assert(string.format("-%.20s.20s", string.rep("%", 2000)) ==
"-"..string.rep("%", 20)..".20s")
@@ -237,7 +235,6 @@ end
assert(string.format("\0%s\0", "\0\0\1") == "\0\0\0\1\0")
checkerror("contains zeros", string.format, "%10s", "\0")
-checkerror("cannot have modifiers", string.format, "%10q", "1")
-- format x tostring
assert(string.format("%s %s", nil, true) == "nil true")
@@ -341,6 +338,21 @@ do print("testing 'format %a %A'")
end
+-- testing some flags (all these results are required by ISO C)
+assert(string.format("%#12o", 10) == " 012")
+assert(string.format("%#10x", 100) == " 0x64")
+assert(string.format("%#-17X", 100) == "0X64 ")
+assert(string.format("%013i", -100) == "-000000000100")
+assert(string.format("%2.5d", -100) == "-00100")
+assert(string.format("%.u", 0) == "")
+assert(string.format("%+#014.0f", 100) == "+000000000100.")
+assert(string.format("% 1.0E", 100) == " 1E+02")
+assert(string.format("%-16c", 97) == "a ")
+assert(string.format("%+.3G", 1.5) == "+1.5")
+assert(string.format("% .1g", 2^10) == " 1e+03")
+assert(string.format("%.0s", "alo") == "")
+assert(string.format("%.s", "alo") == "")
+
-- errors in format
local function check (fmt, msg)
@@ -348,13 +360,21 @@ local function check (fmt, msg)
end
local aux = string.rep('0', 600)
-check("%100.3d", "too long")
+check("%100.3d", "invalid conversion")
check("%1"..aux..".3d", "too long")
-check("%1.100d", "too long")
+check("%1.100d", "invalid conversion")
check("%10.1"..aux.."004d", "too long")
check("%t", "invalid conversion")
-check("%"..aux.."d", "repeated flags")
+check("%"..aux.."d", "too long")
check("%d %d", "no value")
+check("%010c", "invalid conversion")
+check("%.10c", "invalid conversion")
+check("%0.34s", "invalid conversion")
+check("%#i", "invalid conversion")
+check("%3.1p", "invalid conversion")
+check("%0.s", "invalid conversion")
+check("%10q", "cannot have modifiers")
+check("%F", "invalid conversion") -- useless and not in C89
assert(load("return 1\n--comment without ending EOL")() == 1)