diff options
author | rrt <rrt> | 2010-07-19 20:07:17 +0000 |
---|---|---|
committer | rrt <rrt> | 2010-07-19 20:07:17 +0000 |
commit | cf705e859f23a3477616c6f3080ecda313b67766 (patch) | |
tree | d09d8b6d36874b2114f538e5175fd514b12e7092 | |
parent | 6dad4d41c35340b4c0e0d0ecd011252129f907a9 (diff) | |
download | lrexlib-cf705e859f23a3477616c6f3080ecda313b67766.tar.gz |
Add a new regex engine: GNU regex.
This checkin contains simply a first rough cut of the new code, which
passes the existing tests (but rewritten for Emacs syntax, in
emacs_sets.lua).
Documentation and complete code to expose the unique features of GNU
regex is yet to come.
-rwxr-xr-x | Makefile | 15 | ||||
-rwxr-xr-x | src/gnu/.cvsignore | 1 | ||||
-rwxr-xr-x | src/gnu/lgnu.c | 272 | ||||
-rwxr-xr-x | src/gnu/rex_gnu.mak | 48 | ||||
-rwxr-xr-x | test/emacs_sets.lua | 342 | ||||
-rwxr-xr-x | test/runtest.lua | 2 |
6 files changed, 677 insertions, 3 deletions
@@ -2,8 +2,9 @@ # See src/*.mak for user-definable settings -POSIX = src/posix +GNU = src/gnu PCRE = src/pcre +POSIX = src/posix ONIG = src/oniguruma all: build test @@ -14,6 +15,9 @@ test: test_pcre test_posix test_onig clean: clean_pcre clean_posix clean_onig +build_gnu: + make -C $(GNU) -f rex_gnu.mak + build_pcre: make -C $(PCRE) -f rex_pcre.mak @@ -23,6 +27,9 @@ build_posix: build_onig: make -C $(ONIG) -f rex_onig.mak +test_gnu: + cd test && lua ./runtest.lua -d../$(PCRE) gnu + test_pcre: cd test && lua ./runtest.lua -d../$(PCRE) pcre @@ -32,6 +39,9 @@ test_posix: test_onig: cd test && lua ./runtest.lua -d../$(ONIG) onig +clean_gnu: + make -C $(PCRE) -f rex_gnu.mak clean + clean_pcre: make -C $(PCRE) -f rex_pcre.mak clean @@ -41,5 +51,6 @@ clean_posix: clean_onig: make -C $(ONIG) -f rex_onig.mak clean -.PHONY: all build test clean build_pcre test_pcre clean_pcre build_posix \ +.PHONY: all build test clean build_gnu test_gnu clean_gnu \ + build_pcre test_pcre clean_pcre build_posix \ test_posix clean_posix build_onig test_onig clean_onig diff --git a/src/gnu/.cvsignore b/src/gnu/.cvsignore new file mode 100755 index 0000000..25d32ea --- /dev/null +++ b/src/gnu/.cvsignore @@ -0,0 +1 @@ +*.so.*
\ No newline at end of file diff --git a/src/gnu/lgnu.c b/src/gnu/lgnu.c new file mode 100755 index 0000000..0a5cab3 --- /dev/null +++ b/src/gnu/lgnu.c @@ -0,0 +1,272 @@ +/* lgnu.c - Lua binding of GNU regular expressions library */ +/* See Copyright Notice in the file LICENSE */ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "lua.h" +#include "lauxlib.h" +#include "../common.h" + +#define __USE_GNU +#ifndef REX_GNU_INCLUDE +# include <regex.h> +#else +# include REX_GNU_INCLUDE +#endif + +/* These 2 settings may be redefined from the command-line or the makefile. + * They should be kept in sync between themselves and with the target name. + */ +#ifndef REX_LIBNAME +# define REX_LIBNAME "rex_gnu" +#endif +#ifndef REX_OPENLIB +# define REX_OPENLIB luaopen_rex_gnu +#endif + +#define REX_TYPENAME REX_LIBNAME"_regex" + +static int getcflags (lua_State *L, int pos); +#define ALG_GETCFLAGS(L,pos) getcflags(L, pos) + +#define ALG_CFLAGS_DFLT 0 +#define ALG_EFLAGS_DFLT 0 +/* FIXME: Need to be able to set the following fields, so treat eflags like cflags: + unsigned __REPB_PREFIX(no_sub) : 1; + unsigned __REPB_PREFIX(not_bol) : 1; + unsigned __REPB_PREFIX(not_eol) : 1; + unsigned __REPB_PREFIX(newline_anchor) : 1; +*/ + +#define ALG_NOMATCH -1 /* FIXME: -2 for internal error is also possible; take arg like ALG_ISMATCH */ +#define ALG_ISMATCH(res) ((res) >= 0) +#define ALG_SUBBEG(ud,n) ud->match.start[n] +#define ALG_SUBEND(ud,n) ud->match.end[n] +#define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) +#define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) +#define ALG_NSUB(ud) ((int)ud->r.re_nsub) + +#define ALG_PUSHSUB(L,ud,text,n) \ + lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) + +#define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ + (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) + +#define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) +#define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) +#define ALG_PUSHOFFSETS(L,ud,offs,n) \ + (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) + +#define ALG_BASE(st) (st) + +typedef struct { + struct re_pattern_buffer r; + struct re_registers match; + int freed; + const char * errmsg; +} TGnu; + +#define TUserdata TGnu + +#include "../algo.h" + +/* Functions + ****************************************************************************** + */ + +static int getcflags (lua_State *L, int pos) { + switch (lua_type (L, pos)) { + case LUA_TNONE: + case LUA_TNIL: + return ALG_CFLAGS_DFLT; + default: + return luaL_typeerror (L, pos, "FIXME: translation tables not yet implemented"); + } +} + +/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes + RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this + isn't minimal, since other operators, such as \`, aren't disabled. */ +#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is + removed and RE_NO_BK_REFS is added. */ +#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) + +static int generate_error (lua_State *L, const TUserdata *ud, int errcode) { + const char *errmsg; + switch (errcode) { + case 0: + errmsg = ud->errmsg; + break; + case -1: + errmsg = "no match"; + break; + case -2: + errmsg = "internal error in GNU regex"; + break; + default: + errmsg = "internal error in lrexlib"; + } + return luaL_error (L, "%s", errmsg); +} + +static int compile_regex (lua_State *L, const TArgComp *argC, TGnu **pud) { + const char *res; + TGnu *ud; + reg_syntax_t old_syntax; + int ret; + + ud = (TGnu *)lua_newuserdata (L, sizeof (TGnu)); + memset (ud, 0, sizeof (TGnu)); /* initialize all members to 0 */ + + /* translate table is never written to, so this cast is safe */ + /* FIXME: ud->r.translate = (unsigned char *) translate; */ + + old_syntax = re_set_syntax (RE_SYNTAX_EMACS); /* FIXME: take syntax parameter in cflags */ + res = re_compile_pattern (argC->pattern, argC->patlen, &ud->r); + if (res != NULL) { + ud->errmsg = res; + ret = generate_error (L, ud, 0); + } else { + if (argC->cflags & REG_NOSUB) + ud->r.no_sub = 1; + + lua_pushvalue (L, LUA_ENVIRONINDEX); + lua_setmetatable (L, -2); + + if (pud) *pud = ud; + ret = 1; + } + + re_set_syntax (old_syntax); + return ret; +} + +static int gmatch_exec (TUserdata *ud, TArgExec *argE) { + if (argE->startoffset > 0) + ud->r.not_bol = 1; + argE->text += argE->startoffset; + argE->textlen -= argE->startoffset; + return re_search (&ud->r, argE->text, argE->textlen, 0, argE->textlen, &ud->match); +} + +static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { + lua_pushlstring (L, argE->text, argE->textlen); +} + +static int findmatch_exec (TGnu *ud, TArgExec *argE) { + argE->text += argE->startoffset; + argE->textlen -= argE->startoffset; + return re_search (&ud->r, argE->text, argE->textlen, 0, argE->textlen, &ud->match); +} + +static int gsub_exec (TGnu *ud, TArgExec *argE, int st) { + if (st > 0) + ud->r.not_bol = 1; + return re_search (&ud->r, argE->text + st, argE->textlen - st, 0, argE->textlen - st, &ud->match); +} + +static int split_exec (TGnu *ud, TArgExec *argE, int offset) { + if (offset > 0) + ud->r.not_bol = 1; + return re_search (&ud->r, argE->text + offset, argE->textlen - offset, 0, argE->textlen - offset, &ud->match); +} + +static int Gnu_gc (lua_State *L) { + TGnu *ud = check_ud (L); + if (ud->freed == 0) { /* precaution against "manual" __gc calling */ + ud->freed = 1; + if (ud->r.regs_allocated != REGS_UNALLOCATED) { + free (ud->match.start); + free (ud->match.end); + } + } + return 0; +} + +static int Gnu_tostring (lua_State *L) { + TGnu *ud = check_ud (L); + if (ud->freed == 0) + lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); + else + lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); + return 1; +} + +static flag_pair gnu_flags[] = +{ + { "SYNTAX_EMACS", RE_SYNTAX_EMACS }, + { "SYNTAX_AWK", RE_SYNTAX_AWK }, + { "SYNTAX_GNU_AWK", RE_SYNTAX_GNU_AWK }, + { "SYNTAX_POSIX_AWK", RE_SYNTAX_POSIX_AWK }, + { "SYNTAX_GREP", RE_SYNTAX_GREP }, + { "SYNTAX_EGREP", RE_SYNTAX_EGREP }, + { "SYNTAX_POSIX_EGREP", RE_SYNTAX_POSIX_EGREP }, + { "SYNTAX_ED", RE_SYNTAX_ED }, + { "SYNTAX_SED", RE_SYNTAX_SED }, + { "SYNTAX_POSIX_BASIC", RE_SYNTAX_POSIX_BASIC }, +/*---------------------------------------------------------------------------*/ + { NULL, 0 } +}; + +static int Gnu_get_flags (lua_State *L) { + const flag_pair* fps[] = { gnu_flags, NULL }; + return get_flags (L, fps); +} + +static const luaL_reg gnumeta[] = { + { "exec", ud_exec }, + { "tfind", ud_tfind }, /* old match */ + /* { "trfind", ud_trfind }, */ + { "find", ud_find }, + /* { "rfind", ud_rfind }, */ + { "match", ud_match }, + { "__gc", Gnu_gc }, + { "__tostring", Gnu_tostring }, + { NULL, NULL} +}; + +static const luaL_reg rexlib[] = { + { "match", match }, + { "find", find }, + /* { "rfind", rfind }, */ + { "gmatch", gmatch }, + { "gsub", gsub }, + { "split", split }, + { "new", ud_new }, + { "flags", Gnu_get_flags }, + { "plainfind", plainfind_func }, + /* { "set_syntax", set_syntax }, */ + { NULL, NULL } +}; + +/* Open the library */ +REX_API int REX_OPENLIB (lua_State *L) +{ + /* create a new function environment to serve as a metatable for methods */ + lua_newtable (L); + lua_pushvalue (L, -1); + lua_replace (L, LUA_ENVIRONINDEX); + lua_pushvalue(L, -1); /* mt.__index = mt */ + lua_setfield(L, -2, "__index"); + luaL_register (L, NULL, gnumeta); + + /* register functions */ + luaL_register (L, REX_LIBNAME, rexlib); + lua_pushliteral (L, REX_VERSION" (for GNU regexes)"); + lua_setfield (L, -2, "_VERSION"); + return 1; +} diff --git a/src/gnu/rex_gnu.mak b/src/gnu/rex_gnu.mak new file mode 100755 index 0000000..92dac03 --- /dev/null +++ b/src/gnu/rex_gnu.mak @@ -0,0 +1,48 @@ +# makefile for rex_gnu library + +include ../defaults.mak + +# === USER SETTINGS === +# =========================================================================== + +# These are default values. +INC = +LIB = + +# If the default settings don't work for your system, +# try to uncomment and edit the settings below. +#INC = +#LIB = -lc + +# WARNING: +# If you want to use a GNU regex library that is not the system +# default, make sure you set both the INC and LIB variables correctly, +# as if a header file and library are used which do not match, you may +# well get segmentation faults (or worse). + +# The following lines work for the rxspencer library, when installed +# under /usr (note the above warning!) +#INC = -I/usr/include/rxspencer +#LIB = -lrxspencer + +# Target name +TRG = rex_gnu + +# =========================================================================== +# === END OF USER SETTINGS === + +OBJ = lgnu.o ../common.o + +include ../common.mak + +# static GNU regexp library binding +ar_gnu: $(TRG_AR) + +# dynamic GNU regexp library binding +so_gnu: $(TRG_SO) + +# Dependencies +lgnu.o: lgnu.c ../common.h ../algo.h +../common.o: ../common.c ../common.h + +# (End of Makefile) diff --git a/test/emacs_sets.lua b/test/emacs_sets.lua new file mode 100755 index 0000000..7f74595 --- /dev/null +++ b/test/emacs_sets.lua @@ -0,0 +1,342 @@ +-- See Copyright Notice in the file LICENSE + +-- This file should contain only test sets that behave identically +-- when being run with pcre or posix regex libraries. + +local luatest = require "luatest" +local N = luatest.NT + +local function norm(a) return a==nil and N or a end + +local function get_gsub (lib) + return lib.gsub or + function (subj, pattern, repl, n) + return lib.new (pattern) : gsub (subj, repl, n) + end +end + +local function set_f_gmatch (lib, flg) + -- gmatch (s, p, [cf], [ef]) + local function test_gmatch (subj, patt) + local out, guard = {}, 10 + for a, b in lib.gmatch (subj, patt) do + table.insert (out, { norm(a), norm(b) }) + guard = guard - 1 + if guard == 0 then break end + end + return unpack (out) + end + return { + Name = "Function gmatch", + Func = test_gmatch, + --{ subj patt results } + { {"ab", lib.new"."}, {{"a",N}, {"b",N} } }, + { {("abcd"):rep(3), "\\(.\\)b.\\(d\\)"}, {{"a","d"},{"a","d"},{"a","d"}} }, + { {"abcd", ".*" }, {{"abcd",N},{"",N} } },--zero-length match + { {"abc", "^." }, {{"a",N}} },--anchored pattern + } +end + +local function set_f_split (lib, flg) + -- split (s, p, [cf], [ef]) + local function test_split (subj, patt) + local out, guard = {}, 10 + for a, b, c in lib.split (subj, patt) do + table.insert (out, { norm(a), norm(b), norm(c) }) + guard = guard - 1 + if guard == 0 then break end + end + return unpack (out) + end + return { + Name = "Function split", + Func = test_split, + --{ subj patt results } + { {"ab", lib.new","}, {{"ab",N,N}, } }, + { {"ab", ","}, {{"ab",N,N}, } }, + { {",", ","}, {{"",",",N}, {"", N, N}, } }, + { {",,", ","}, {{"",",",N}, {"",",",N}, {"",N,N} } }, + { {"a,b", ","}, {{"a",",",N}, {"b",N,N}, } }, + { {",a,b", ","}, {{"",",",N}, {"a",",",N}, {"b",N,N}} }, + { {"a,b,", ","}, {{"a",",",N}, {"b",",",N}, {"",N,N} } }, + { {"a,,b", ","}, {{"a",",",N}, {"",",",N}, {"b",N,N}} }, + { {"ab<78>c", "<\\(.\\)\\(.\\)>"}, {{"ab","7","8"}, {"c",N,N}, } }, + { {"abc", "^."}, {{"", "a",N}, {"bc",N,N}, } },--anchored pattern + { {"abc", "^"}, {{"", "", N}, {"abc",N,N}, } }, +-- { {"abc", "$"}, {{"abc","",N}, {"",N,N}, } }, +-- { {"abc", "^|$"}, {{"", "", N}, {"abc","",N},{"",N,N},} }, + } +end + +local function set_f_find (lib, flg) + return { + Name = "Function find", + Func = lib.find, + -- {subj, patt, st}, { results } + { {"abcd", lib.new".+"}, { 1,4 } }, -- [none] + { {"abcd", ".+"}, { 1,4 } }, -- [none] + { {"abcd", ".+", 2}, { 2,4 } }, -- positive st + { {"abcd", ".+", -2}, { 3,4 } }, -- negative st + { {"abcd", ".*"}, { 1,4 } }, -- [none] + { {"abc", "bc"}, { 2,3 } }, -- [none] + { {"abcd", "\\(.\\)b.\\(d\\)"}, { 1,4,"a","d" }}, -- [captures] + } +end + +local function set_f_match (lib, flg) + return { + Name = "Function match", + Func = lib.match, + -- {subj, patt, st}, { results } + { {"abcd", lib.new".+"}, {"abcd"} }, -- [none] + { {"abcd", ".+"}, {"abcd"} }, -- [none] + { {"abcd", ".+", 2}, {"bcd"} }, -- positive st + { {"abcd", ".+", -2}, {"cd"} }, -- negative st + { {"abcd", ".*"}, {"abcd"} }, -- [none] + { {"abc", "bc"}, {"bc"} }, -- [none] + { {"abcd", "\\(.\\)b.\\(d\\)"}, {"a","d"} }, -- [captures] + } +end + +local function set_m_exec (lib, flg) + return { + Name = "Method exec", + Method = "exec", + --{patt}, {subj, st} { results } + { {".+"}, {"abcd"}, {1,4,{}} }, -- [none] + { {".+"}, {"abcd",2}, {2,4,{}} }, -- positive st + { {".+"}, {"abcd",-2}, {3,4,{}} }, -- negative st + { {".*"}, {"abcd"}, {1,4,{}} }, -- [none] + { {"bc"}, {"abc"}, {2,3,{}} }, -- [none] + { { "\\(.\\)b.\\(d\\)"}, {"abcd"}, {1,4,{1,1,4,4}}},--[captures] + { {"\\(a+\\)6+\\(b+\\)"}, {"Taa66bbT",2}, {2,7,{2,3,6,7}}},--[st+captures] + } +end + +local function set_m_tfind (lib, flg) + return { + Name = "Method tfind", + Method = "tfind", + --{patt}, {subj, st} { results } + { {".+"}, {"abcd"}, {1,4,{}} }, -- [none] + { {".+"}, {"abcd",2}, {2,4,{}} }, -- positive st + { {".+"}, {"abcd",-2}, {3,4,{}} }, -- negative st + { {".*"}, {"abcd"}, {1,4,{}} }, -- [none] + { {"bc"}, {"abc"}, {2,3,{}} }, -- [none] + { {"\\(.\\)b.\\(d\\)"}, {"abcd"}, {1,4,{"a","d"}}},--[captures] + } +end + +local function set_m_find (lib, flg) + return { + Name = "Method find", + Method = "find", + --{patt}, {subj, st} { results } + { {".+"}, {"abcd"}, {1,4} }, -- [none] + { {".+"}, {"abcd",2}, {2,4} }, -- positive st + { {".+"}, {"abcd",-2}, {3,4} }, -- negative st + { {".*"}, {"abcd"}, {1,4} }, -- [none] + { {"bc"}, {"abc"}, {2,3} }, -- [none] + { {"\\(.\\)b.\\(d\\)"}, {"abcd"}, {1,4,"a","d"}},--[captures] + } +end + +local function set_m_match (lib, flg) + return { + Name = "Method match", + Method = "match", + --{patt}, {subj, st} { results } + { {".+"}, {"abcd"}, {"abcd"} }, -- [none] + { {".+"}, {"abcd",2}, {"bcd" } }, -- positive st + { {".+"}, {"abcd",-2}, {"cd" } }, -- negative st + { {".*"}, {"abcd"}, {"abcd"} }, -- [none] + { {"bc"}, {"abc"}, {"bc" } }, -- [none] + {{ "\\(.\\)b.\\(d\\)"}, {"abcd"}, {"a","d"} }, --[captures] + } +end + +local function set_f_plainfind (lib, flg) + return { + Name = "Function plainfind", + Func = lib.plainfind, + --{ subj, patt, st, ci } { results } + { {"abcd", "bc"}, {2,3} }, -- [none] + { {"abcd", "dc"}, {N} }, -- [none] + { {"abcd", "cd"}, {3,4} }, -- positive st + { {"abcd", "cd", 3}, {3,4} }, -- positive st + { {"abcd", "cd", 4}, {N} }, -- failing st + { {"abcd", "bc", 2}, {2,3} }, -- positive st + { {"abcd", "bc", -4}, {2,3} }, -- negative st + { {"abcd", "bc", 3}, {N} }, -- failing st + { {"abcd", "BC"}, {N} }, -- case sensitive + { {"abcd", "BC", N, true}, {2,3} }, -- case insensitive + { {"ab\0cd", "b\0c"}, {2,4} }, -- contains nul + { {"abcd", "", 1}, {1,0} }, -- empty pattern + { {"abcd", "", 5}, {5,4} }, -- empty pattern + { {"abcd", "", 6}, {N} }, -- empty pattern + } +end + +local function set_f_gsub1 (lib, flg) + local subj, pat = "abcdef", "[abef]+" + local cpat = lib.new(pat) + return { + Name = "Function gsub, set1", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, cpat, "", 0}, {subj, 0, 0} }, -- test "n" + empty_replace + { {subj, pat, "", 0}, {subj, 0, 0} }, -- test "n" + empty_replace + { {subj, pat, "", -1}, {subj, 0, 0} }, -- test "n" + empty_replace + { {subj, pat, "", 1}, {"cdef", 1, 1} }, + { {subj, pat, "", 2}, {"cd", 2, 2} }, + { {subj, pat, "", 3}, {"cd", 2, 2} }, + { {subj, pat, "" }, {"cd", 2, 2} }, + { {subj, pat, "#", 0}, {subj, 0, 0} }, -- test "n" + non-empty_replace + { {subj, pat, "#", 1}, {"#cdef", 1, 1} }, + { {subj, pat, "#", 2}, {"#cd#", 2, 2} }, + { {subj, pat, "#", 3}, {"#cd#", 2, 2} }, + { {subj, pat, "#" }, {"#cd#", 2, 2} }, + { {"abc", "^.", "#" }, {"#bc", 1, 1} }, -- anchored pattern + } +end + +local function set_f_gsub2 (lib, flg) + local subj, pat = "abc", "\\([ac]\\)" + return { + Name = "Function gsub, set2", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, pat, "<%1>" }, {"<a>b<c>", 2, 2} }, -- test non-escaped chars in f + { {subj, pat, "%<%1%>" }, {"<a>b<c>", 2, 2} }, -- test escaped chars in f + { {subj, pat, "" }, {"b", 2, 2} }, -- test empty replace + { {subj, pat, "1" }, {"1b1", 2, 2} }, -- test odd and even %'s in f + { {subj, pat, "%1" }, {"abc", 2, 2} }, + { {subj, pat, "%%1" }, {"%1b%1", 2, 2} }, + { {subj, pat, "%%%1" }, {"%ab%c", 2, 2} }, + { {subj, pat, "%%%%1" }, {"%%1b%%1", 2, 2} }, + { {subj, pat, "%%%%%1" }, {"%%ab%%c", 2, 2} }, + } +end + +local function set_f_gsub3 (lib, flg) + return { + Name = "Function gsub, set3", + Func = get_gsub (lib), + --{ s, p, f, n, res1,res2,res3 }, + { {"abc", "a", "%0" }, {"abc", 1, 1} }, -- test (in)valid capture index + { {"abc", "a", "%1" }, {"abc", 1, 1} }, + { {"abc", "[ac]", "%1" }, {"abc", 2, 2} }, + { {"abc", "\\(a\\)", "%1" }, {"abc", 1, 1} }, + { {"abc", "\\(a\\)", "%2" }, "invalid capture index" }, + } +end + +local function set_f_gsub4 (lib, flg) + return { + Name = "Function gsub, set4", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test . + { {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+ + { {"a2c3", ".*", "#" }, {"##", 2, 2} }, -- test .* + { {"/* */ */", "\\/\\*\\(.*\\)\\*\\/", "#" }, {"#", 1, 1} }, + { {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d + { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D + { {"a \t\nb", "[ \t\n]", "#" }, {"a###b", 3, 3} }, -- test %s + { {"a \t\nb", "[^ \t\n]", "#" }, {"# \t\n#", 2, 2} }, -- test %S + } +end + +local function set_f_gsub5 (lib, flg) + local function frep1 () end -- returns nothing + local function frep2 () return "#" end -- ignores arguments + local function frep3 (...) return table.concat({...}, ",") end -- "normal" + local function frep4 () return {} end -- invalid return type + local function frep5 () return "7", "a" end -- 2-nd return is "a" + local function frep6 () return "7", "break" end -- 2-nd return is "break" + local subj = "a2c3" + return { + Name = "Function gsub, set5", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, "a\\(.\\)c\\(.\\)", frep1 }, {subj, 1, 0} }, + { {subj, "a\\(.\\)c\\(.\\)", frep2 }, {"#", 1, 1} }, + { {subj, "a\\(.\\)c\\(.\\)", frep3 }, {"2,3", 1, 1} }, + { {subj, "a.c.", frep3 }, {subj, 1, 1} }, + { {subj, "z*", frep1 }, {subj, 5, 0} }, + { {subj, "z*", frep2 }, {"#a#2#c#3#", 5, 5} }, + { {subj, "z*", frep3 }, {subj, 5, 5} }, + { {subj, subj, frep4 }, "invalid return type" }, + { {"abc",".", frep5 }, {"777", 3, 3} }, + { {"abc",".", frep6 }, {"777", 3, 3} }, + } +end + +local function set_f_gsub6 (lib, flg) + local tab1, tab2, tab3 = {}, { ["2"] = 56 }, { ["2"] = {} } + local subj = "a2c3" + return { + Name = "Function gsub, set6", + Func = get_gsub (lib), + --{ s, p, f, n, res1,res2,res3 }, + { {subj, "a\\(.\\)c\\(.\\)", tab1 }, {subj, 1, 0} }, + { {subj, "a\\(.\\)c\\(.\\)", tab2 }, {"56", 1, 1} }, + { {subj, "a\\(.\\)c\\(.\\)", tab3 }, "invalid replacement type" }, + { {subj, "a.c.", tab1 }, {subj, 1, 0} }, + { {subj, "a.c.", tab2 }, {subj, 1, 0} }, + { {subj, "a.c.", tab3 }, {subj, 1, 0} }, + } +end + +local function set_f_gsub8 (lib, flg) + local subj, patt, repl = "abcdef", "..", "*" + return { + Name = "Function gsub, set8", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, patt, repl, function() end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return false end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return true end }, {"***", 3, 3} }, + { {subj, patt, repl, function() return {} end }, {"***", 3, 3} }, + { {subj, patt, repl, function() return "#" end }, {"###", 3, 3} }, + { {subj, patt, repl, function() return 57 end }, {"575757", 3, 3} }, + { {subj, patt, repl, function (from) return from end }, {"135", 3, 3} }, + { {subj, patt, repl, function (from, to) return to end }, {"246", 3, 3} }, + { {subj, patt, repl, function (from,to,rep) return rep end }, + {"***", 3, 3} }, + { {subj, patt, repl, function (from, to, rep) return rep..to..from end }, + {"*21*43*65", 3, 3} }, + { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil, nil end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil, false end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil, true end }, {"ab**", 3, 2} }, + { {subj, patt, repl, function() return true, true end }, {"***", 3, 3} }, + { {subj, patt, repl, function() return nil, 0 end }, {"abcdef", 1, 0} }, + { {subj, patt, repl, function() return true, 0 end }, {"*cdef", 1, 1} }, + { {subj, patt, repl, function() return nil, 1 end }, {"ab*ef", 2, 1} }, + { {subj, patt, repl, function() return true, 1 end }, {"**ef", 2, 2} }, + } +end + +return function (libname) + local lib = require (libname) + return { + set_f_gmatch (lib), + set_f_split (lib), + set_f_find (lib), + set_f_match (lib), + set_m_exec (lib), + set_m_tfind (lib), + set_m_find (lib), + set_m_match (lib), + set_f_gsub1 (lib), + set_f_gsub2 (lib), + set_f_gsub3 (lib), + set_f_gsub4 (lib), + set_f_gsub5 (lib), + set_f_gsub6 (lib), + set_f_gsub8 (lib), + set_f_plainfind (lib), + } +end diff --git a/test/runtest.lua b/test/runtest.lua index c4f60e7..7c1eef4 100755 --- a/test/runtest.lua +++ b/test/runtest.lua @@ -37,7 +37,7 @@ local function test_library (libname, setfile, verbose) end local avail_tests = { - gnuregex = { lib = "rex_gnuregex", "common_sets", "posix_sets" }, + gnu = { lib = "rex_gnu", "emacs_sets", }, posix = { lib = "rex_posix", "common_sets", "posix_sets", }, spencer = { lib = "rex_spencer", "common_sets", "posix_sets", "spencer_sets" }, posix1 = { lib = "rex_posix1", "common_sets", "posix_sets", "spencer_sets" }, |