diff options
-rw-r--r-- | src/algo.h | 16 | ||||
-rw-r--r-- | src/tre/Makefile | 3 | ||||
-rw-r--r-- | src/tre/ltre.c | 3 | ||||
-rw-r--r-- | src/tre/ltre_w.c | 219 | ||||
-rw-r--r-- | test/luatest.lua | 49 | ||||
-rw-r--r-- | test/runtest.lua | 2 | ||||
-rw-r--r-- | test/tre_sets.lua | 347 | ||||
-rw-r--r-- | windows/mingw/_mingw.mak | 2 | ||||
-rw-r--r-- | windows/mingw/rex_tre.mak | 3 |
9 files changed, 611 insertions, 33 deletions
@@ -13,6 +13,10 @@ static int split_exec (TUserdata *ud, TArgExec *argE, int offset); static int compile_regex (lua_State *L, const TArgComp *argC, TUserdata **pud); static int generate_error (lua_State *L, const TUserdata *ud, int errcode); +#ifndef ALG_CHARSIZE +# define ALG_CHARSIZE 1 +#endif + #ifndef ALG_GETCARGS # define ALG_GETCARGS(a,b,c) #endif @@ -72,11 +76,11 @@ static int get_startoffset(lua_State *L, int stackpos, size_t len) { if(startoffset > 0) startoffset--; else if(startoffset < 0) { - startoffset += len; + startoffset += len/ALG_CHARSIZE; if(startoffset < 0) startoffset = 0; } - return startoffset; + return startoffset*ALG_CHARSIZE; } @@ -447,10 +451,10 @@ static int gmatch_iter (lua_State *L) { #ifdef ALG_USERETRY SET_RETRY (retry, 1); #else - incr = 1; + incr = ALG_CHARSIZE; #endif } - ALG_PUSHEND (L, ud, ALG_BASE(argE.startoffset)+incr, 0); /* update start offset */ + lua_pushinteger(L, ALG_BASE(argE.startoffset) + incr + ALG_SUBEND(ud,0)); /* update start offset */ lua_replace (L, lua_upvalueindex (4)); #ifdef ALG_USERETRY lua_pushinteger (L, retry); @@ -501,9 +505,9 @@ static int split_iter (lua_State *L) { res = split_exec (ud, &argE, newoffset); if (ALG_ISMATCH (res)) { - ALG_PUSHEND (L, ud, ALG_BASE(newoffset), 0); /* update start offset */ + lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset */ lua_replace (L, lua_upvalueindex (4)); - lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : 1); /* update incr */ + lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */ lua_replace (L, lua_upvalueindex (5)); /* push text preceding the match */ lua_pushlstring (L, argE.text + argE.startoffset, diff --git a/src/tre/Makefile b/src/tre/Makefile index 79458a4..2d3964e 100644 --- a/src/tre/Makefile +++ b/src/tre/Makefile @@ -25,7 +25,7 @@ REGNAME = tre # =========================================================================== # === END OF USER SETTINGS === -OBJ = ltre.o ../common.o +OBJ = ltre.o ltre_w.o ../common.o include ../common.mak @@ -37,6 +37,7 @@ so_tre: $(TRG_SO) # Dependencies ltre.o: ltre.c ../common.h ../algo.h +ltre_w.o: ltre_w.c ../common.h ../algo.h ../common.o: ../common.c ../common.h # (End of Makefile) diff --git a/src/tre/ltre.c b/src/tre/ltre.c index 259646b..c612183 100644 --- a/src/tre/ltre.c +++ b/src/tre/ltre.c @@ -7,6 +7,7 @@ #include "lua.h" #include "lauxlib.h" #include "../common.h" +extern void add_wide_lib (lua_State *L, int methods); #include <tre/tre.h> @@ -347,9 +348,11 @@ REX_API int REX_OPENLIB (lua_State *L) lua_pushvalue(L, -1); /* mt.__index = mt */ lua_setfield(L, -2, "__index"); luaL_register (L, NULL, posixmeta); + add_wide_lib (L, 1); /* register functions */ luaL_register (L, REX_LIBNAME, rexlib); + add_wide_lib (L, 0); lua_pushliteral (L, REX_VERSION" (for TRE regexes)"); lua_setfield (L, -2, "_VERSION"); return 1; diff --git a/src/tre/ltre_w.c b/src/tre/ltre_w.c new file mode 100644 index 0000000..bd3ca1e --- /dev/null +++ b/src/tre/ltre_w.c @@ -0,0 +1,219 @@ +/* ltre.c - Lua binding of TRE regular expressions library */ +/* See Copyright Notice in the file LICENSE */ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "lua.h" +#include "lauxlib.h" +#include "../common.h" + +#include <tre/tre.h> + +/* These 2 settings may be redefined from the command-line or the makefile. + * They should be kept in sync between themselves and with the target name. + */ +#ifndef REX_LIBNAME +# define REX_LIBNAME "rex_tre" +#endif +#ifndef REX_OPENLIB +# define REX_OPENLIB luaopen_rex_tre +#endif + +#define REX_TYPENAME REX_LIBNAME"_regex" + +#define ALG_CFLAGS_DFLT REG_EXTENDED +#define ALG_EFLAGS_DFLT 0 +#define ALG_CHARSIZE 2 + +#define ALG_NOMATCH(res) ((res) == REG_NOMATCH) +#define ALG_ISMATCH(res) ((res) == 0) +#define ALG_SUBBEG(ud,n) (ALG_CHARSIZE * ud->match[n].rm_so) +#define ALG_SUBEND(ud,n) (ALG_CHARSIZE * ud->match[n].rm_eo) +#define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) +#define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) +#define ALG_NSUB(ud) ((int)ud->r.re_nsub) + +#define ALG_PUSHSUB(L,ud,text,n) \ + lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) + +#define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ + (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) + +#define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, ((offs) + ALG_SUBBEG(ud,n))/ALG_CHARSIZE + 1) +#define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, ((offs) + ALG_SUBEND(ud,n))/ALG_CHARSIZE) +#define ALG_PUSHOFFSETS(L,ud,offs,n) \ + (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) + +#define ALG_BASE(st) (st) +#define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT) + +typedef struct { + regex_t r; + regmatch_t * match; + int freed; +} TPosix; + +#define TUserdata TPosix + +#include "../algo.h" + +/* Functions + ****************************************************************************** + */ + +static void checkarg_regaparams (lua_State *L, int stackpos, regaparams_t *argP) { + if (lua_type (L, stackpos) != LUA_TTABLE) /* allow for userdata? */ + luaL_argerror (L, stackpos, "table expected"); + lua_pushvalue (L, stackpos); + argP->cost_ins = get_int_field (L, "cost_ins"); + argP->cost_del = get_int_field (L, "cost_del"); + argP->cost_subst = get_int_field (L, "cost_subst"); + argP->max_cost = get_int_field (L, "max_cost"); + argP->max_ins = get_int_field (L, "max_ins"); + argP->max_del = get_int_field (L, "max_del"); + argP->max_subst = get_int_field (L, "max_subst"); + argP->max_err = get_int_field (L, "max_err"); + lua_pop (L, 1); +} + +/* method r:atfind (s, params, [st], [ef]) */ +/* method r:aexec (s, params, [st], [ef]) */ +static void checkarg_atfind (lua_State *L, TArgExec *argE, TPosix **ud, + regaparams_t *argP) { + *ud = check_ud (L); + argE->text = luaL_checklstring (L, 2, &argE->textlen); + checkarg_regaparams (L, 3, argP); + argE->startoffset = get_startoffset (L, 4, argE->textlen); + argE->eflags = luaL_optint (L, 5, ALG_EFLAGS_DFLT); +} + +static int generate_error (lua_State *L, const TPosix *ud, int errcode) { + char errbuf[80]; + tre_regerror (errcode, &ud->r, errbuf, sizeof (errbuf)); + return luaL_error (L, "%s", errbuf); +} + +static int compile_regex (lua_State *L, const TArgComp *argC, TPosix **pud) { + int res; + TPosix *ud; + + ud = (TPosix *)lua_newuserdata (L, sizeof (TPosix)); + memset (ud, 0, sizeof (TPosix)); /* initialize all members to 0 */ + + res = tre_regwncomp (&ud->r, (const wchar_t*)argC->pattern, argC->patlen/ALG_CHARSIZE, argC->cflags); + if (res != 0) + return generate_error (L, ud, res); + + if (argC->cflags & REG_NOSUB) + ud->r.re_nsub = 0; + ud->match = (regmatch_t *) Lmalloc (L, (ALG_NSUB(ud) + 1) * sizeof (regmatch_t)); + lua_pushvalue (L, LUA_ENVIRONINDEX); + lua_setmetatable (L, -2); + + if (pud) *pud = ud; + return 1; +} + +static int generic_atfind (lua_State *L, int tfind) { + int res; + TArgExec argE; + TPosix *ud; + regaparams_t argP; + regamatch_t res_match; + + checkarg_atfind (L, &argE, &ud, &argP); + if (argE.startoffset > (int)argE.textlen) + return lua_pushnil(L), 1; + + argE.text += argE.startoffset; + res_match.nmatch = ALG_NSUB(ud) + 1; + res_match.pmatch = ud->match; + + /* execute the search */ + res = tre_reganexec (&ud->r, argE.text, argE.textlen - argE.startoffset, + &res_match, argP, argE.eflags); + if (ALG_ISMATCH (res)) { + ALG_PUSHOFFSETS (L, ud, argE.startoffset, 0); + if (tfind) + push_substring_table (L, ud, argE.text); + else + push_offset_table (L, ud, argE.startoffset); + /* set values in the dictionary part of the table */ + set_int_field (L, "cost", res_match.cost); + set_int_field (L, "num_ins", res_match.num_ins); + set_int_field (L, "num_del", res_match.num_del); + set_int_field (L, "num_subst", res_match.num_subst); + return 3; + } + else if (ALG_NOMATCH (res)) + return lua_pushnil (L), 1; + else + return generate_error (L, ud, res); +} + +static int Ltre_atfind (lua_State *L) { + return generic_atfind (L, 1); +} + +static int Ltre_aexec (lua_State *L) { + return generic_atfind (L, 0); +} + +static int gmatch_exec (TUserdata *ud, TArgExec *argE) { + if (argE->startoffset > 0) + argE->eflags |= REG_NOTBOL; + argE->text += argE->startoffset; + return tre_regwnexec (&ud->r, (const wchar_t*)argE->text, (argE->textlen - argE->startoffset)/ALG_CHARSIZE, + ALG_NSUB(ud) + 1, ud->match, argE->eflags); +} + +static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { + lua_pushlstring (L, argE->text, argE->textlen); +} + +static int findmatch_exec (TPosix *ud, TArgExec *argE) { + argE->text += argE->startoffset; + return tre_regwnexec (&ud->r, (const wchar_t*)argE->text, (argE->textlen - argE->startoffset)/ALG_CHARSIZE, + ALG_NSUB(ud) + 1, ud->match, argE->eflags); +} + +static int gsub_exec (TPosix *ud, TArgExec *argE, int st) { + if (st > 0) + argE->eflags |= REG_NOTBOL; + return tre_regwnexec (&ud->r, (const wchar_t*)(argE->text+st), (argE->textlen-st)/ALG_CHARSIZE, ALG_NSUB(ud)+1, + ud->match, argE->eflags); +} + +static int split_exec (TPosix *ud, TArgExec *argE, int offset) { + if (offset > 0) + argE->eflags |= REG_NOTBOL; + return tre_regwnexec (&ud->r, (const wchar_t*)(argE->text + offset), (argE->textlen - offset)/ALG_CHARSIZE, + ALG_NSUB(ud) + 1, ud->match, argE->eflags); +} + +static const luaL_Reg posixmeta[] = { + { "wexec", ud_exec }, + { "wtfind", ud_tfind }, + { "wfind", ud_find }, + { "wmatch", ud_match }, + { "waexec", Ltre_aexec }, + { "watfind", Ltre_atfind }, + { NULL, NULL} +}; + +static const luaL_Reg rexlib[] = { + { "wmatch", match }, + { "wfind", find }, + { "wgmatch", gmatch }, + { "wgsub", gsub }, + { "wsplit", split }, + { "wnew", ud_new }, + { NULL, NULL } +}; + +/* Add the library */ +void add_wide_lib (lua_State *L, int methods) +{ + luaL_register(L, NULL, methods ? posixmeta : rexlib); +} diff --git a/test/luatest.lua b/test/luatest.lua index cd191ed..5ea7ba8 100644 --- a/test/luatest.lua +++ b/test/luatest.lua @@ -1,9 +1,7 @@ -- See Copyright Notice in the file LICENSE -local P = {} - -- arrays: deep comparison -function P.eq (t1, t2, lut) +local function eq (t1, t2, lut) if t1 == t2 then return true end if type(t1) ~= "table" or type(t2) ~= "table" or #t1 ~= #t2 then return false @@ -16,20 +14,20 @@ function P.eq (t1, t2, lut) lut[t1][t2], lut[t2][t1] = true, true for k,v in ipairs (t1) do - if not P.eq (t2[k], v, lut) then return false end -- recursion + if not eq (t2[k], v, lut) then return false end -- recursion end return true end -- a "nil GUID", to be used instead of nils in datasets -P.NT = "b5f74fe5-46f4-483a-8321-e58ba2fa0e17" +local NT = "b5f74fe5-46f4-483a-8321-e58ba2fa0e17" -- pack vararg in table, replacing nils with "NT" items local function packNT (...) local t = {} for i=1, select ("#", ...) do local v = select (i, ...) - t[i] = (v == nil) and P.NT or v + t[i] = (v == nil) and NT or v end return t end @@ -39,7 +37,7 @@ local function unpackNT (t) local len = #t local function unpack_from (i) local v = t[i] - if v == P.NT then v = nil end + if v == NT then v = nil end if i == len then return v end return v, unpack_from (i+1) end @@ -47,31 +45,29 @@ local function unpackNT (t) end -- print results (deep into arrays) -function P.print_results (val, indent, lut) +local function print_results (val, indent, lut) indent = indent or "" lut = lut or {} -- look-up table local str = tostring (val) if type (val) == "table" then - if val == P.NT then - print (indent .. "nil") - elseif lut[val] then - print (indent .. str) + if lut[val] then + io.write (indent, str, "\n") else lut[val] = true - print (indent .. str) + io.write (indent, str, "\n") for i,v in ipairs (val) do - P.print_results (v, " " .. indent, lut) -- recursion + print_results (v, " " .. indent, lut) -- recursion end end else - print (indent .. str) + io.write (indent, val == NT and "nil" or str, "\n") end end -- returns: -- 1) true, if success; false, if failure -- 2) test results table or error_message -function P.test_function (test, func) +local function test_function (test, func) local res local t = packNT (pcall (func, unpackNT (test[1]))) if t[1] then @@ -81,7 +77,7 @@ function P.test_function (test, func) res = t[2] --> error_message end local how = (type (res) == type (test[2])) and - (type (res) == "string" or P.eq (res, test[2])) -- allow error messages to differ + (type (res) == "string" or eq (res, test[2])) -- allow error messages to differ return how, res end @@ -89,7 +85,7 @@ end -- 1) true, if success; false, if failure -- 2) test results table or error_message -- 3) test results table or error_message -function P.test_method (test, constructor, name) +local function test_method (test, constructor, name) local res1, res2 local ok, r = pcall (constructor, unpackNT (test[1])) if ok then @@ -103,17 +99,17 @@ function P.test_method (test, constructor, name) else res1, res2 = 1, r --> 1, error_message end - return P.eq (res1, test[3]), res1, res2 + return eq (res1, test[3]), res1, res2 end -- returns: a list of failed tests -function P.test_set (set, lib) +local function test_set (set, lib) local list = {} if type (set.Func) == "function" then local func = set.Func for i,test in ipairs (set) do - local ok, res = P.test_function (test, func) + local ok, res = test_function (test, func) if not ok then table.insert (list, {i=i, res}) end @@ -121,7 +117,7 @@ function P.test_set (set, lib) elseif type (set.Method) == "string" then for i,test in ipairs (set) do - local ok, res1, res2 = P.test_method (test, lib.new, set.Method) + local ok, res1, res2 = test_method (test, lib.new, set.Method) if not ok then table.insert (list, {i=i, res1, res2}) end @@ -134,4 +130,11 @@ function P.test_set (set, lib) return list end -return P +return { + eq = eq, + NT = NT, + print_results = print_results, + test_function = test_function, + test_method = test_method, + test_set = test_set, +} diff --git a/test/runtest.lua b/test/runtest.lua index 354b50e..efb4e43 100644 --- a/test/runtest.lua +++ b/test/runtest.lua @@ -41,7 +41,7 @@ local avail_tests = { posix = { lib = "rex_posix", "common_sets", "posix_sets", }, spencer = { lib = "rex_spencer", "common_sets", "posix_sets", "spencer_sets" }, posix1 = { lib = "rex_posix1", "common_sets", "posix_sets", "spencer_sets" }, - tre = { lib = "rex_tre", "common_sets", "posix_sets", "spencer_sets" }, + tre = { lib = "rex_tre", "common_sets", "posix_sets", "spencer_sets", "tre_sets" }, lord = { lib = "rex_lord", "common_sets", "posix_sets" }, maddock = { lib = "rex_maddock", "common_sets", "posix_sets", }, pcreposix = { lib = "rex_pcreposix","common_sets", "posix_sets", }, diff --git a/test/tre_sets.lua b/test/tre_sets.lua new file mode 100644 index 0000000..f99ceab --- /dev/null +++ b/test/tre_sets.lua @@ -0,0 +1,347 @@ +-- See Copyright Notice in the file LICENSE + +-- This file should contain only test sets that behave identically +-- when being run with pcre or posix regex libraries. + +local luatest = require "luatest" +local N = luatest.NT + +local sysutils = require "sysutils" +local L = sysutils.Utf8ToUtf16 +local U = sysutils.Utf16ToUtf8 + +local function norm(a) return a==nil and N or a end + +local function get_wgsub (lib) + return lib.wgsub or + function (subj, pattern, repl, n) + return lib.wnew (pattern) : wgsub (subj, repl, n) + end +end + +local function set_f_wgmatch (lib, flg) + -- gmatch (s, p, [cf], [ef]) + local function test_wgmatch (subj, patt) + local out, guard = {}, 10 + for a, b in lib.wgmatch (subj, patt) do + table.insert (out, { norm(a), norm(b) }) + guard = guard - 1 + if guard == 0 then break end + end + return unpack (out) + end + return { + Name = "Function wgmatch", + Func = test_wgmatch, + --{ subj patt results } + { {L"ab", lib.wnew(L".")}, {{L"a",N}, {L"b",N} } }, + { {(L"abcd"):rep(3), L"(.)b.(d)"}, {{L"a",L"d"},{L"a",L"d"},{L"a",L"d"}} }, + { {L"abcd", L".*" }, {{L"abcd",N},{L"",N} } },--zero-length match + { {L"abc", L"^." }, {{L"a",N}} },--anchored pattern + } +end + +local function set_f_wsplit (lib, flg) + -- split (s, p, [cf], [ef]) + local function test_wsplit (subj, patt) + local out, guard = {}, 10 + for a, b, c in lib.wsplit (subj, patt) do + table.insert (out, { norm(a), norm(b), norm(c) }) + guard = guard - 1 + if guard == 0 then break end + end + return unpack (out) + end + return { + Name = "Function wsplit", + Func = test_wsplit, + --{ subj patt results } + { {L"ab", lib.wnew(L",")}, {{L"ab",N,N}, } }, + { {L"ab", L","}, {{L"ab",N,N}, } }, + { {L",", L","}, {{L"",L",",N}, {L"", N, N}, } }, + { {L",,", L","}, {{L"",L",",N}, {L"",L",",N}, {L"",N,N} } }, + { {L"a,b", L","}, {{L"a",L",",N}, {L"b",N,N}, } }, + { {L",a,b", L","}, {{L"",L",",N}, {L"a",L",",N}, {L"b",N,N}} }, + { {L"a,b,", L","}, {{L"a",L",",N}, {L"b",L",",N}, {L"",N,N} } }, + { {L"a,,b", L","}, {{L"a",L",",N}, {L"",L",",N}, {L"b",N,N}} }, + { {L"ab<78>c", L"<(.)(.)>"}, {{L"ab",L"7",L"8"}, {L"c",N,N}, } }, + { {L"abc", L"^."}, {{L"", L"a",N}, {L"bc",N,N}, } },--anchored pattern + { {L"abc", L"^"}, {{L"", L"", N}, {L"abc",N,N}, } }, +-- { {"abc", "$"}, {{"abc","",N}, {"",N,N}, } }, +-- { {"abc", "^|$"}, {{"", "", N}, {"abc","",N},{"",N,N},} }, + } +end + +local function set_f_wfind (lib, flg) + return { + Name = "Function wfind", + Func = lib.wfind, + -- {subj, patt, st}, { results } + { {L"abcd", lib.wnew(L".+")}, { 1,4 } }, -- [none] + { {L"abcd", L".+"}, { 1,4 } }, -- [none] + { {L"abcd", L".+", 2}, { 2,4 } }, -- positive st + { {L"abcd", L".+", -2}, { 3,4 } }, -- negative st + { {L"abcd", L".*"}, { 1,4 } }, -- [none] + { {L"abc", L"bc"}, { 2,3 } }, -- [none] + { {L"abcd", L"(.)b.(d)"}, { 1,4,L"a",L"d" }}, -- [captures] + } +end + +local function set_f_wmatch (lib, flg) + return { + Name = "Function wmatch", + Func = lib.wmatch, + -- {subj, patt, st}, { results } + { {L"abcd", lib.wnew(L".+")}, {L"abcd"} }, -- [none] + { {L"abcd", L".+"}, {L"abcd"} }, -- [none] + { {L"abcd", L".+", 2}, {L"bcd"} }, -- positive st + { {L"abcd", L".+", -2}, {L"cd"} }, -- negative st + { {L"abcd", L".*"}, {L"abcd"} }, -- [none] + { {L"abc", L"bc"}, {L"bc"} }, -- [none] + { {L"abcd", L"(.)b.(d)"}, {L"a",L"d"} }, -- [captures] + } +end + +local function set_m_wexec (lib, flg) + return { + Name = "Method wexec", + Method = "wexec", + --{patt}, {subj, st} { results } + { {L".+"}, {L"abcd"}, {1,4,{}} }, -- [none] + { {L".+"}, {L"abcd",2}, {2,4,{}} }, -- positive st + { {L".+"}, {L"abcd",-2}, {3,4,{}} }, -- negative st + { {L".*"}, {L"abcd"}, {1,4,{}} }, -- [none] + { {L"bc"}, {L"abc"}, {2,3,{}} }, -- [none] + { {L "(.)b.(d)"}, {L"abcd"}, {1,4,{1,1,4,4}}},--[captures] + { {L"(a+)6+(b+)"}, {L"Taa66bbT",2}, {2,7,{2,3,6,7}}},--[st+captures] + } +end + +local function set_m_wtfind (lib, flg) + return { + Name = "Method wtfind", + Method = "wtfind", + --{patt}, {subj, st} { results } + { {L".+"}, {L"abcd"}, {1,4,{}} }, -- [none] + { {L".+"}, {L"abcd",2}, {2,4,{}} }, -- positive st + { {L".+"}, {L"abcd",-2}, {3,4,{}} }, -- negative st + { {L".*"}, {L"abcd"}, {1,4,{}} }, -- [none] + { {L"bc"}, {L"abc"}, {2,3,{}} }, -- [none] + { {L"(.)b.(d)"}, {L"abcd"}, {1,4,{L"a",L"d"}}},--[captures] + } +end + +local function set_m_wfind (lib, flg) + return { + Name = "Method wfind", + Method = "wfind", + --{patt}, {subj, st} { results } + { {L".+"}, {L"abcd"}, {1,4} }, -- [none] + { {L".+"}, {L"abcd",2}, {2,4} }, -- positive st + { {L".+"}, {L"abcd",-2}, {3,4} }, -- negative st + { {L".*"}, {L"abcd"}, {1,4} }, -- [none] + { {L"bc"}, {L"abc"}, {2,3} }, -- [none] + { {L"(.)b.(d)"}, {L"abcd"}, {1,4,L"a",L"d"}},--[captures] + } +end + +local function set_m_wmatch (lib, flg) + return { + Name = "Method wmatch", + Method = "wmatch", + --{patt}, {subj, st} { results } + { {L".+"}, {L"abcd"}, {L"abcd"} }, -- [none] + { {L".+"}, {L"abcd",2}, {L"bcd" } }, -- positive st + { {L".+"}, {L"abcd",-2}, {L"cd" } }, -- negative st + { {L".*"}, {L"abcd"}, {L"abcd"} }, -- [none] + { {L"bc"}, {L"abc"}, {L"bc" } }, -- [none] + {{ L"(.)b.(d)"}, {L"abcd"}, {L"a",L"d"} }, --[captures] + } +end + +local function set_f_plainfind (lib, flg) + return { + Name = "Function plainfind", + Func = lib.plainfind, + --{ subj, patt, st, ci } { results } + { {"abcd", "bc"}, {2,3} }, -- [none] + { {"abcd", "dc"}, {N} }, -- [none] + { {"abcd", "cd"}, {3,4} }, -- positive st + { {"abcd", "cd", 3}, {3,4} }, -- positive st + { {"abcd", "cd", 4}, {N} }, -- failing st + { {"abcd", "bc", 2}, {2,3} }, -- positive st + { {"abcd", "bc", -4}, {2,3} }, -- negative st + { {"abcd", "bc", 3}, {N} }, -- failing st + { {"abcd", "BC"}, {N} }, -- case sensitive + { {"abcd", "BC", N, true}, {2,3} }, -- case insensitive + { {"ab\0cd", "b\0c"}, {2,4} }, -- contains nul + { {"abcd", "", 1}, {1,0} }, -- empty pattern + { {"abcd", "", 5}, {5,4} }, -- empty pattern + { {"abcd", "", 6}, {N} }, -- empty pattern + } +end + +local function set_f_wgsub1 (lib, flg) + local subj, pat = L"abcdef", L"[abef]+" + local cpat = lib.wnew(pat) + return { + Name = "Function wgsub, set1", + Func = get_wgsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, cpat, L"", 0}, {subj, 0, 0} }, -- test "n" + empty_replace + { {subj, pat, L"", 0}, {subj, 0, 0} }, -- test "n" + empty_replace + { {subj, pat, L"", -1}, {subj, 0, 0} }, -- test "n" + empty_replace + { {subj, pat, L"", 1}, {L"cdef", 1, 1} }, + { {subj, pat, L"", 2}, {L"cd", 2, 2} }, + { {subj, pat, L"", 3}, {L"cd", 2, 2} }, + { {subj, pat, L"" }, {L"cd", 2, 2} }, + { {subj, pat, L"#", 0}, {subj, 0, 0} }, -- test "n" + non-empty_replace + { {subj, pat, L"#", 1}, {L"#cdef", 1, 1} }, + { {subj, pat, L"#", 2}, {L"#cd#", 2, 2} }, + { {subj, pat, L"#", 3}, {L"#cd#", 2, 2} }, + { {subj, pat, L"#" }, {L"#cd#", 2, 2} }, + { {L"abc", L"^.",L"#" }, {L"#bc", 1, 1} }, -- anchored pattern + } +end + +local function set_f_wgsub2 (lib, flg) + local subj, pat = L"abc", L"([ac])" + return { + Name = "Function wgsub, set2", + Func = get_wgsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, pat, L"<%1>" }, {L"<a>b<c>", 2, 2} }, -- test non-escaped chars in f + { {subj, pat, L"%<%1%>" }, {L"<a>b<c>", 2, 2} }, -- test escaped chars in f + { {subj, pat, L"" }, {L"b", 2, 2} }, -- test empty replace + { {subj, pat, L"1" }, {L"1b1", 2, 2} }, -- test odd and even %'s in f + { {subj, pat, L"%1" }, {L"abc", 2, 2} }, + { {subj, pat, L"%%1" }, {L"%1b%1", 2, 2} }, + { {subj, pat, L"%%%1" }, {L"%ab%c", 2, 2} }, + { {subj, pat, L"%%%%1" }, {L"%%1b%%1", 2, 2} }, + { {subj, pat, L"%%%%%1" }, {L"%%ab%%c", 2, 2} }, + } +end + +local function set_f_wgsub3 (lib, flg) + return { + Name = "Function wgsub, set3", + Func = get_wgsub (lib), + --{ s, p, f, n, res1,res2,res3 }, + { {L"abc", L"a", "%0" }, {"abc", 1, 1} }, -- test (in)valid capture index + { {L"abc", L"a", "%1" }, {"abc", 1, 1} }, + { {L"abc", L"[ac]", "%1" }, {"abc", 2, 2} }, + { {L"abc", L"(a)", "%1" }, {"abc", 1, 1} }, + { {L"abc", L"(a)", "%2" }, "invalid capture index" }, + } +end + +local function set_f_wgsub4 (lib, flg) + return { + Name = "Function wgsub, set4", + Func = get_wgsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test . + { {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+ + { {"a2c3", ".*", "#" }, {"##", 2, 2} }, -- test .* + { {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1, 1} }, + { {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d + { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D + { {"a \t\nb", "[ \t\n]", "#" }, {"a###b", 3, 3} }, -- test %s + { {"a \t\nb", "[^ \t\n]", "#" }, {"# \t\n#", 2, 2} }, -- test %S + } +end + +local function set_f_wgsub5 (lib, flg) + local function frep1 () end -- returns nothing + local function frep2 () return "#" end -- ignores arguments + local function frep3 (...) return table.concat({...}, ",") end -- "normal" + local function frep4 () return {} end -- invalid return type + local function frep5 () return "7", "a" end -- 2-nd return is "a" + local function frep6 () return "7", "break" end -- 2-nd return is "break" + local subj = "a2c3" + return { + Name = "Function wgsub, set5", + Func = get_wgsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, "a(.)c(.)", frep1 }, {subj, 1, 0} }, + { {subj, "a(.)c(.)", frep2 }, {"#", 1, 1} }, + { {subj, "a(.)c(.)", frep3 }, {"2,3", 1, 1} }, + { {subj, "a.c.", frep3 }, {subj, 1, 1} }, + { {subj, "z*", frep1 }, {subj, 5, 0} }, + { {subj, "z*", frep2 }, {"#a#2#c#3#", 5, 5} }, + { {subj, "z*", frep3 }, {subj, 5, 5} }, + { {subj, subj, frep4 }, "invalid return type" }, + { {"abc",".", frep5 }, {"777", 3, 3} }, + { {"abc",".", frep6 }, {"777", 3, 3} }, + } +end + +local function set_f_wgsub6 (lib, flg) + local tab1, tab2, tab3 = {}, { ["2"] = 56 }, { ["2"] = {} } + local subj = "a2c3" + return { + Name = "Function wgsub, set6", + Func = get_wgsub (lib), + --{ s, p, f, n, res1,res2,res3 }, + { {subj, "a(.)c(.)", tab1 }, {subj, 1, 0} }, + { {subj, "a(.)c(.)", tab2 }, {"56", 1, 1} }, + { {subj, "a(.)c(.)", tab3 }, "invalid replacement type" }, + { {subj, "a.c.", tab1 }, {subj, 1, 0} }, + { {subj, "a.c.", tab2 }, {subj, 1, 0} }, + { {subj, "a.c.", tab3 }, {subj, 1, 0} }, + } +end + +local function set_f_wgsub8 (lib, flg) + local subj, patt, repl = "abcdef", "..", "*" + return { + Name = "Function wgsub, set8", + Func = get_wgsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, patt, repl, function() end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return false end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return true end }, {"***", 3, 3} }, + { {subj, patt, repl, function() return {} end }, {"***", 3, 3} }, + { {subj, patt, repl, function() return "#" end }, {"###", 3, 3} }, + { {subj, patt, repl, function() return 57 end }, {"575757", 3, 3} }, + { {subj, patt, repl, function (from) return from end }, {"135", 3, 3} }, + { {subj, patt, repl, function (from, to) return to end }, {"246", 3, 3} }, + { {subj, patt, repl, function (from,to,rep) return rep end }, + {"***", 3, 3} }, + { {subj, patt, repl, function (from, to, rep) return rep..to..from end }, + {"*21*43*65", 3, 3} }, + { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil, nil end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil, false end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil, true end }, {"ab**", 3, 2} }, + { {subj, patt, repl, function() return true, true end }, {"***", 3, 3} }, + { {subj, patt, repl, function() return nil, 0 end }, {"abcdef", 1, 0} }, + { {subj, patt, repl, function() return true, 0 end }, {"*cdef", 1, 1} }, + { {subj, patt, repl, function() return nil, 1 end }, {"ab*ef", 2, 1} }, + { {subj, patt, repl, function() return true, 1 end }, {"**ef", 2, 2} }, + } +end + +return function (libname) + local lib = require (libname) + lib.new = lib.wnew + return { + set_f_wgmatch (lib), + set_f_wsplit (lib), + set_f_wfind (lib), + set_f_wmatch (lib), + set_m_wexec (lib), + set_m_wtfind (lib), + set_m_wfind (lib), + set_m_wmatch (lib), + set_f_wgsub1 (lib), + --set_f_wgsub2 (lib), + --set_f_wgsub3 (lib), + --set_f_wgsub4 (lib), + --set_f_wgsub5 (lib), + --set_f_wgsub6 (lib), + --set_f_wgsub8 (lib), + --set_f_plainfind (lib), + } +end diff --git a/windows/mingw/_mingw.mak b/windows/mingw/_mingw.mak index b8d7c81..c7d9ba3 100644 --- a/windows/mingw/_mingw.mak +++ b/windows/mingw/_mingw.mak @@ -5,7 +5,7 @@ LUAINC = s:\progr\work\system\include # path of Lua DLL and regexp library DLL -DLLPATH = c:\exe +DLLPATH = c:\exe32 # name of Lua DLL to link to (.dll should be omitted) LUADLL = lua5.1 diff --git a/windows/mingw/rex_tre.mak b/windows/mingw/rex_tre.mak index 70a1f36..c1c31ea 100644 --- a/windows/mingw/rex_tre.mak +++ b/windows/mingw/rex_tre.mak @@ -8,7 +8,7 @@ REGEXINC = s:\progr\work\system\include PROJECT = rex_tre MYINCS = -I$(REGEXINC) -I$(LUAINC) MYLIBS = -L$(DLLPATH) -ltre -l$(LUADLL) -OBJ = ltre.o common.o +OBJ = ltre.o ltre_w.o common.o MYCFLAGS = -W -Wall -O2 EXPORTED = 'luaopen_$(PROJECT)' SRCPATH = ..\..\src;..\..\src\tre @@ -18,4 +18,5 @@ TESTNAME = tre include _mingw.mak ltre.o : common.h algo.h +ltre_w.o : common.h algo.h common.o : common.h |