From 02060b7a37d88d4e92cf64a008c0651eae432c12 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Tue, 23 Aug 2022 16:08:53 -0300 Subject: Simpler handling of Byte Order Mark (BOM) --- lauxlib.c | 47 ++++++++++++++++++++++++++--------------------- testes/main.lua | 35 ++++++++++++++++++++++++++++++----- 2 files changed, 56 insertions(+), 26 deletions(-) diff --git a/lauxlib.c b/lauxlib.c index 413d8f97..cba5df9b 100644 --- a/lauxlib.c +++ b/lauxlib.c @@ -740,17 +740,18 @@ static int errfile (lua_State *L, const char *what, int fnameindex) { } -static int skipBOM (LoadF *lf) { - const char *p = "\xEF\xBB\xBF"; /* UTF-8 BOM mark */ - int c; - lf->n = 0; - do { - c = getc(lf->f); - if (c == EOF || c != *(const unsigned char *)p++) return c; - lf->buff[lf->n++] = c; /* to be read by the parser */ - } while (*p != '\0'); - lf->n = 0; /* prefix matched; discard it */ - return getc(lf->f); /* return next character */ +/* +** Skip an optional BOM at the start of a stream. If there is an +** incomplete BOM (the first character is correct but the rest is +** not), returns the first character anyway to force an error +** (as no chunk can start with 0xEF). +*/ +static int skipBOM (FILE *f) { + int c = getc(f); /* read first character */ + if (c == 0xEF && getc(f) == 0xBB && getc(f) == 0xBF) /* correct BOM? */ + return getc(f); /* ignore BOM and return next char */ + else /* no (valid) BOM */ + return c; /* return first character */ } @@ -761,13 +762,13 @@ static int skipBOM (LoadF *lf) { ** first "valid" character of the file (after the optional BOM and ** a first-line comment). */ -static int skipcomment (LoadF *lf, int *cp) { - int c = *cp = skipBOM(lf); +static int skipcomment (FILE *f, int *cp) { + int c = *cp = skipBOM(f); if (c == '#') { /* first line is a comment (Unix exec. file)? */ do { /* skip first line */ - c = getc(lf->f); + c = getc(f); } while (c != EOF && c != '\n'); - *cp = getc(lf->f); /* skip end-of-line, if present */ + *cp = getc(f); /* next character after comment, if present */ return 1; /* there was a comment */ } else return 0; /* no comment */ @@ -789,12 +790,16 @@ LUALIB_API int luaL_loadfilex (lua_State *L, const char *filename, lf.f = fopen(filename, "r"); if (lf.f == NULL) return errfile(L, "open", fnameindex); } - if (skipcomment(&lf, &c)) /* read initial portion */ - lf.buff[lf.n++] = '\n'; /* add line to correct line numbers */ - if (c == LUA_SIGNATURE[0] && filename) { /* binary file? */ - lf.f = freopen(filename, "rb", lf.f); /* reopen in binary mode */ - if (lf.f == NULL) return errfile(L, "reopen", fnameindex); - skipcomment(&lf, &c); /* re-read initial portion */ + lf.n = 0; + if (skipcomment(lf.f, &c)) /* read initial portion */ + lf.buff[lf.n++] = '\n'; /* add newline to correct line numbers */ + if (c == LUA_SIGNATURE[0]) { /* binary file? */ + lf.n = 0; /* remove possible newline */ + if (filename) { /* "real" file? */ + lf.f = freopen(filename, "rb", lf.f); /* reopen in binary mode */ + if (lf.f == NULL) return errfile(L, "reopen", fnameindex); + skipcomment(lf.f, &c); /* re-read initial portion */ + } } if (c != EOF) lf.buff[lf.n++] = c; /* 'c' is the first character of the stream */ diff --git a/testes/main.lua b/testes/main.lua index 9def6386..9187420e 100644 --- a/testes/main.lua +++ b/testes/main.lua @@ -94,6 +94,33 @@ RUN('echo "print(10)\nprint(2)\n" | lua > %s', out) checkout("10\n2\n") +-- testing BOM +prepfile("\xEF\xBB\xBF") +RUN('lua %s > %s', prog, out) +checkout("") + +prepfile("\xEF\xBB\xBFprint(3)") +RUN('lua %s > %s', prog, out) +checkout("3\n") + +prepfile("\xEF\xBB\xBF# comment!!\nprint(3)") +RUN('lua %s > %s', prog, out) +checkout("3\n") + +-- bad BOMs +prepfile("\xEF") +NoRun("unexpected symbol", 'lua %s > %s', prog, out) + +prepfile("\xEF\xBB") +NoRun("unexpected symbol", 'lua %s > %s', prog, out) + +prepfile("\xEFprint(3)") +NoRun("unexpected symbol", 'lua %s > %s', prog, out) + +prepfile("\xEF\xBBprint(3)") +NoRun("unexpected symbol", 'lua %s > %s', prog, out) + + -- test option '-' RUN('echo "print(arg[1])" | lua - -h > %s', out) checkout("-h\n") @@ -385,12 +412,10 @@ checkprogout("101\n13\t22\n\n") prepfile[[#comment in 1st line without \n at the end]] RUN('lua %s', prog) -prepfile[[#test line number when file starts with comment line -debug = require"debug" -print(debug.getinfo(1).currentline) -]] +-- first-line comment with binary file +prepfile("#comment\n" .. string.dump(load("print(3)"))) RUN('lua %s > %s', prog, out) -checkprogout('3\n') +checkout('3\n') -- close Lua with an open file prepfile(string.format([[io.output(%q); io.write('alo')]], out)) -- cgit v1.2.1