summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShmuel Zeigerman <solomuz0@gmail.com>2015-02-26 19:52:07 +0200
committerShmuel Zeigerman <solomuz0@gmail.com>2015-02-26 19:52:07 +0200
commit0684b19e44ea5e937791a051133d31eb06c27292 (patch)
tree55a30cf02d9255b555505b7374b95423d696c68b
parenta5c2a17019a42cccc8a5163bb8dc57af6f17f61d (diff)
downloadlrexlib-0684b19e44ea5e937791a051133d31eb06c27292.tar.gz
In the functions searching for multiple matches every empty match adjacent to the previous match is discarded.
-rw-r--r--doc/manual.txt4
-rw-r--r--src/algo.h136
-rw-r--r--test/common_sets.lua6
-rw-r--r--test/oniguruma_sets.lua29
-rw-r--r--test/pcre_sets.lua29
-rw-r--r--windows/mingw/_mingw.mak28
6 files changed, 140 insertions, 92 deletions
diff --git a/doc/manual.txt b/doc/manual.txt
index 5fec66a..9a49bc1 100644
--- a/doc/manual.txt
+++ b/doc/manual.txt
@@ -88,6 +88,10 @@ Notes
9. The notation *larg...* is used to indicate optional library-specific
arguments, which are documented in the ``new`` method of each library.
+10. In the functions searching for multiple matches (``gmatch``, ``gsub``,
+ ``split``, ``count``) every empty match adjacent to the previous match
+ is discarded, e.g. ``rex.count("abc",".*")`` will return 1.
+
------------------------------------------------------------
Functions and methods common to all bindings
diff --git a/src/algo.h b/src/algo.h
index d14e92b..9a4b91c 100644
--- a/src/algo.h
+++ b/src/algo.h
@@ -228,7 +228,7 @@ static int algf_gsub (lua_State *L) {
TUserdata *ud;
TArgComp argC;
TArgExec argE;
- int n_match = 0, n_subst = 0, st = 0;
+ int n_match = 0, n_subst = 0, st = 0, last_to = -1;
TBuffer BufOut, BufRep, BufTemp, *pBuf = &BufOut;
TFreeList freelist;
/*------------------------------------------------------------------*/
@@ -262,9 +262,18 @@ static int algf_gsub (lua_State *L) {
freelist_free (&freelist);
return generate_error (L, ud, res);
}
- ++n_match;
from = ALG_BASE(st) + ALG_SUBBEG(ud,0);
to = ALG_BASE(st) + ALG_SUBEND(ud,0);
+ if (to == last_to) { /* discard an empty match adjacent to the previous match */
+ if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
+ buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
+ st += ALG_CHARSIZE;
+ continue;
+ }
+ break;
+ }
+ last_to = to;
+ ++n_match;
if (st < from) {
buffer_addlstring (&BufOut, argE.text + st, from - st);
#ifdef ALG_PULL
@@ -392,7 +401,7 @@ static int algf_count (lua_State *L) {
TUserdata *ud;
TArgComp argC;
TArgExec argE;
- int n_match = 0, st = 0;
+ int n_match = 0, st = 0, last_to = -1;
/*------------------------------------------------------------------*/
checkarg_count (L, &argC, &argE);
if (argC.ud) {
@@ -410,8 +419,16 @@ static int algf_count (lua_State *L) {
else if (!ALG_ISMATCH (res)) {
return generate_error (L, ud, res);
}
- ++n_match;
to = ALG_BASE(st) + ALG_SUBEND(ud,0);
+ if (to == last_to) { /* discard an empty match adjacent to the previous match */
+ if (st < (int)argE.textlen) { /* advance by 1 char */
+ st += ALG_CHARSIZE;
+ continue;
+ }
+ break;
+ }
+ last_to = to;
+ ++n_match;
#ifdef ALG_PULL
{
int from = ALG_BASE(st) + ALG_SUBBEG(ud,0);
@@ -487,24 +504,32 @@ static int algf_match (lua_State *L) {
static int gmatch_iter (lua_State *L) {
+ int last_end, res;
TArgExec argE;
TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1));
argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen);
argE.eflags = lua_tointeger (L, lua_upvalueindex (3));
argE.startoffset = lua_tointeger (L, lua_upvalueindex (4));
-
- if (argE.startoffset > (int)argE.textlen)
- return 0;
+ last_end = lua_tointeger (L, lua_upvalueindex (5));
while (1) {
- int res = gmatch_exec (ud, &argE);
+ if (argE.startoffset > (int)argE.textlen)
+ return 0;
+ res = gmatch_exec (ud, &argE);
if (ALG_ISMATCH (res)) {
int incr = 0;
if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */
+ if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) {
+ argE.startoffset += ALG_CHARSIZE;
+ continue;
+ }
incr = ALG_CHARSIZE;
}
- lua_pushinteger(L, ALG_BASE(argE.startoffset) + incr + ALG_SUBEND(ud,0)); /* update start offset */
+ last_end = ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0);
+ lua_pushinteger(L, last_end + incr); /* update start offset */
lua_replace (L, lua_upvalueindex (4));
+ lua_pushinteger(L, last_end); /* update last end of match */
+ lua_replace (L, lua_upvalueindex (5));
/* push either captures or entire match */
if (ALG_NSUB(ud)) {
push_substrings (L, ud, argE.text, NULL);
@@ -515,9 +540,8 @@ static int gmatch_iter (lua_State *L) {
return 1;
}
}
- else if (ALG_NOMATCH (res)) {
+ else if (ALG_NOMATCH (res))
return 0;
- }
else
return generate_error (L, ud, res);
}
@@ -525,47 +549,55 @@ static int gmatch_iter (lua_State *L) {
static int split_iter (lua_State *L) {
- int incr, newoffset, res;
+ int incr, last_end, newoffset, res;
TArgExec argE;
TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1));
argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen);
argE.eflags = lua_tointeger (L, lua_upvalueindex (3));
argE.startoffset = lua_tointeger (L, lua_upvalueindex (4));
incr = lua_tointeger (L, lua_upvalueindex (5));
+ last_end = lua_tointeger (L, lua_upvalueindex (6));
- if (argE.startoffset > (int)argE.textlen)
+ if (incr < 0)
return 0;
- if ((newoffset = argE.startoffset + incr) > (int)argE.textlen)
- goto nomatch;
-
- res = split_exec (ud, &argE, newoffset);
- if (ALG_ISMATCH (res)) {
- lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset */
- lua_replace (L, lua_upvalueindex (4));
- lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */
- lua_replace (L, lua_upvalueindex (5));
- /* push text preceding the match */
- lua_pushlstring (L, argE.text + argE.startoffset,
- ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset);
- /* push either captures or entire match */
- if (ALG_NSUB(ud)) {
- push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL);
- return 1 + ALG_NSUB(ud);
- }
- else {
- ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0);
- return 2;
+ while (1) {
+ if ((newoffset = argE.startoffset + incr) > (int)argE.textlen)
+ break;
+ res = split_exec (ud, &argE, newoffset);
+ if (ALG_ISMATCH (res)) {
+ if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */
+ if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) {
+ incr += ALG_CHARSIZE;
+ continue;
+ }
+ }
+ lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset and last_end */
+ lua_pushvalue (L, -1);
+ lua_replace (L, lua_upvalueindex (4));
+ lua_replace (L, lua_upvalueindex (6));
+ lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */
+ lua_replace (L, lua_upvalueindex (5));
+ /* push text preceding the match */
+ lua_pushlstring (L, argE.text + argE.startoffset,
+ ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset);
+ /* push either captures or entire match */
+ if (ALG_NSUB(ud)) {
+ push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL);
+ return 1 + ALG_NSUB(ud);
+ }
+ else {
+ ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0);
+ return 2;
+ }
}
+ else if (ALG_NOMATCH (res))
+ break;
+ else
+ return generate_error (L, ud, res);
}
- else if (ALG_NOMATCH (res))
- goto nomatch;
- else
- return generate_error (L, ud, res);
-
-nomatch:
- lua_pushinteger (L, argE.textlen + 1); /* mark as last iteration */
- lua_replace (L, lua_upvalueindex (4)); /* update start offset */
+ lua_pushinteger (L, -1); /* mark as last iteration */
+ lua_replace (L, lua_upvalueindex (5)); /* incr = -1 */
lua_pushlstring (L, argE.text+argE.startoffset, argE.textlen-argE.startoffset);
return 1;
}
@@ -575,17 +607,16 @@ static int algf_gmatch (lua_State *L)
{
TArgComp argC;
TArgExec argE;
- TUserdata *ud;
checkarg_gmatch_split (L, &argC, &argE);
- if (argC.ud) {
- ud = (TUserdata*) argC.ud;
+ if (argC.ud)
lua_pushvalue (L, 2);
- }
- else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */
+ else
+ compile_regex (L, &argC, NULL); /* 1-st upvalue: ud */
gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */
lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */
lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */
- lua_pushcclosure (L, gmatch_iter, 4);
+ lua_pushinteger (L, -1); /* 5-th upvalue: last end of match */
+ lua_pushcclosure (L, gmatch_iter, 5);
return 1;
}
@@ -593,18 +624,17 @@ static int algf_split (lua_State *L)
{
TArgComp argC;
TArgExec argE;
- TUserdata *ud;
checkarg_gmatch_split (L, &argC, &argE);
- if (argC.ud) {
- ud = (TUserdata*) argC.ud;
+ if (argC.ud)
lua_pushvalue (L, 2);
- }
- else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */
+ else
+ compile_regex (L, &argC, NULL); /* 1-st upvalue: ud */
gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */
lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */
lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */
lua_pushinteger (L, 0); /* 5-th upvalue: incr */
- lua_pushcclosure (L, split_iter, 5);
+ lua_pushinteger (L, -1); /* 6-th upvalue: last_end */
+ lua_pushcclosure (L, split_iter, 6);
return 1;
}
diff --git a/test/common_sets.lua b/test/common_sets.lua
index 828d077..b259efc 100644
--- a/test/common_sets.lua
+++ b/test/common_sets.lua
@@ -33,7 +33,7 @@ local function set_f_gmatch (lib, flg)
--{ subj patt results }
{ {"ab", lib.new"."}, {{"a",N}, {"b",N} } },
{ {("abcd"):rep(3), "(.)b.(d)"}, {{"a","d"},{"a","d"},{"a","d"}} },
- { {"abcd", ".*" }, {{"abcd",N},{"",N} } },--zero-length match
+ { {"abcd", ".*" }, {{"abcd",N} } },--zero-length match
{ {"abc", "^." }, {{"a",N}} },--anchored pattern
}
end
@@ -45,7 +45,7 @@ local function set_f_count (lib, flg)
--{ subj patt results }
{ {"ab", lib.new"."}, { 2 } },
{ {("abcd"):rep(3), "(.)b.(d)"}, { 3 } },
- { {"abcd", ".*" }, { 2 } },
+ { {"abcd", ".*" }, { 1 } },
{ {"abc", "^." }, { 1 } },
}
end
@@ -229,7 +229,7 @@ local function set_f_gsub4 (lib, flg)
--{ s, p, f, n, res1, res2, res3 },
{ {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test .
{ {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+
- { {"a2c3", ".*", "#" }, {"##", 2, 2} }, -- test .*
+ { {"a2c3", ".*", "#" }, {"#", 1, 1} }, -- test .*
{ {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1, 1} },
{ {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d
{ {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D
diff --git a/test/oniguruma_sets.lua b/test/oniguruma_sets.lua
index 374a48e..164de95 100644
--- a/test/oniguruma_sets.lua
+++ b/test/oniguruma_sets.lua
@@ -58,7 +58,7 @@ end
local function set_f_gmatch (lib, flg)
-- gmatch (s, p, [cf], [ef])
- local pCSV = "(^[^,]*)|,([^,]*)"
+ local pCSV = "[^,]*"
local F = false
local function test_gmatch (subj, patt)
local out, guard = {}, 10
@@ -72,13 +72,15 @@ local function set_f_gmatch (lib, flg)
return {
Name = "Function gmatch",
Func = test_gmatch,
- --{ subj patt results }
- { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
- { {"", pCSV}, {{"",F}} },
- { {"12", pCSV}, {{"12",F}} },
- ----{ {",", pCSV}, {{"", F},{F,""}} },
- { {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} },
- ----{ {",,12,45,,ab,", pCSV}, {{"",F},{F,""},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} },
+ --{ subj patt results }
+ { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
+ { {"", pCSV}, {{"",N}} },
+ { {"12", pCSV}, {{"12",N}} },
+ { {",", pCSV}, {{"", N},{"", N}} },
+ { {"12,,45", pCSV}, {{"12",N},{"",N},{"45",N}} },
+ { {",,12,45,,ab,", pCSV}, {{"",N},{"",N},{"12",N},{"45",N},{"",N},{"ab",N},{"",N}} },
+ { {"12345", "(.)(.)"}, {{"1","2"},{"3","4"}} },
+ { {"12345", "(.)(.?)"}, {{"1","2"},{"3","4"},{"5",""}} },
}
end
@@ -98,10 +100,13 @@ local function set_f_split (lib, flg)
Func = test_split,
--{ subj patt results }
{ {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj
- { {"ab", "$"}, {{"ab","",N}, {"",N,N}, } },
- { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
- { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N},{"",N,N}, } },
- { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
+ { {"ab", "$"}, {{"ab","",N}, {"",N,N} } },
+ { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N} } },
+ { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N}, {"",N,N} } },
+ { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N} } },
+ { {"ab", ".*" }, {{"","ab",N}, {"",N,N} } },
+ { {"ab", ".*?" }, {{"","",N}, {"a","",N}, {"b","",N}, {"",N,N} } },
+ { {"ab;de", ";*" }, {{"","",N},{"a","",N},{"b",";",N},{"d","",N},{"e","",N},{"",N,N} }},
}
end
diff --git a/test/pcre_sets.lua b/test/pcre_sets.lua
index 3a19b9d..15ca275 100644
--- a/test/pcre_sets.lua
+++ b/test/pcre_sets.lua
@@ -61,7 +61,7 @@ end
local function set_f_gmatch (lib, flg)
-- gmatch (s, p, [cf], [ef])
- local pCSV = "(^[^,]*)|,([^,]*)"
+ local pCSV = "[^,]*"
local F = false
local function test_gmatch (subj, patt)
local out, guard = {}, 10
@@ -75,13 +75,15 @@ local function set_f_gmatch (lib, flg)
return {
Name = "Function gmatch",
Func = test_gmatch,
- --{ subj patt results }
- { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
- { {"", pCSV}, {{"",F}} },
- { {"12", pCSV}, {{"12",F}} },
- { {",", pCSV}, {{"", F}} },
- { {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} },
- { {",,12,45,,ab,", pCSV}, {{"",F},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} },
+ --{ subj patt results }
+ { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
+ { {"", pCSV}, {{"",N}} },
+ { {"12", pCSV}, {{"12",N}} },
+ { {",", pCSV}, {{"", N},{"", N}} },
+ { {"12,,45", pCSV}, {{"12",N},{"",N},{"45",N}} },
+ { {",,12,45,,ab,", pCSV}, {{"",N},{"",N},{"12",N},{"45",N},{"",N},{"ab",N},{"",N}} },
+ { {"12345", "(.)(.)"}, {{"1","2"},{"3","4"}} },
+ { {"12345", "(.)(.?)"}, {{"1","2"},{"3","4"},{"5",""}} },
}
end
@@ -101,10 +103,13 @@ local function set_f_split (lib, flg)
Func = test_split,
--{ subj patt results }
{ {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj
- { {"ab", "$"}, {{"ab","",N}, {"",N,N}, } },
- { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
- { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N},{"",N,N}, } },
- { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
+ { {"ab", "$"}, {{"ab","",N}, {"",N,N} } },
+ { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N} } },
+ { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N}, {"",N,N} } },
+ { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N} } },
+ { {"ab", ".*" }, {{"","ab",N}, {"",N,N} } },
+ { {"ab", ".*?" }, {{"","",N}, {"a","",N}, {"b","",N}, {"",N,N} } },
+ { {"ab;de", ";*" }, {{"","",N},{"a","",N},{"b",";",N},{"d","",N},{"e","",N},{"",N,N} }},
}
end
diff --git a/windows/mingw/_mingw.mak b/windows/mingw/_mingw.mak
index 224c7fa..bb6e5f0 100644
--- a/windows/mingw/_mingw.mak
+++ b/windows/mingw/_mingw.mak
@@ -6,27 +6,24 @@ VERSION = 2.7.2
# Target Lua version (51 for Lua 5.1; 52 for Lua 5.2).
LUAVERSION = 51
+LUADOTVERSION = $(subst 5,5.,$(LUAVERSION))
# INSTALLPATH : Path to install the built DLL.
# LUADLL : Name of Lua DLL to link to (.dll should be omitted).
# LUAEXE : Name of Lua interpreter.
# LUAINC : Path of Lua include files.
-# LIBPATH : Path of lua5.1.dll, lua52.dll, pcre.dll, etc.
+# LIBPATH : Path of lua51.dll, lua52.dll, pcre.dll, etc.
-LIBPATH = c:\exe32
+INSTALLPATH = s:\exe\lib32\lua\$(LUADOTVERSION)
+LUADLL = lua$(LUAVERSION)
+LUAINC = s:\progr\work\system\include\lua\$(LUADOTVERSION)
+LIBPATH = c:\exe32
ifeq ($(LUAVERSION),51)
- INSTALLPATH = s:\exe\lib32\lua\5.1
- LUADLL = lua5.1
LUAEXE = lua.exe
- LUAINC = s:\progr\work\system\include\lua\5.1
- MYCFLAGS += -DREX_CREATEGLOBALVAR
+ CREATEGLOBAL = -DREX_CREATEGLOBALVAR
else
- INSTALLPATH = s:\exe\lib32\lua\5.2
- LUADLL = lua52
- LUAEXE = lua52.exe
- LUAINC = s:\progr\work\system\include\lua\5.2
-# MYCFLAGS += -DREX_CREATEGLOBALVAR
+ LUAEXE = lua$(LUAVERSION).exe
endif
# --------------------------------------------------------------------------
@@ -34,8 +31,11 @@ endif
BIN = $(PROJECT).dll
BININSTALL = $(INSTALLPATH)\$(BIN)
CC = mingw32-gcc
+AR = ar rcu
+RANLIB = ranlib
CFLAGS = -W -Wall -O2 $(INCS) -DREX_OPENLIB=luaopen_$(PROJECT) \
- -DREX_LIBNAME=\"$(PROJECT)\" -DVERSION=\"$(VERSION)\" $(MYCFLAGS)
+ -DREX_LIBNAME=\"$(PROJECT)\" -DVERSION=\"$(VERSION)\" \
+ $(CREATEGLOBAL) $(MYCFLAGS)
DEFFILE = $(PROJECT).def
EXPORTED = luaopen_$(PROJECT)
INCS = -I$(LUAINC) $(MYINCS)
@@ -61,6 +61,10 @@ test:
$(BIN): $(OBJ) $(DEFFILE)
$(CC) $(DEFFILE) $(OBJ) -L$(LIBPATH) $(LIBS) -o $@ -shared
+lib$(PROJECT)$(LUAVERSION).a: $(OBJ)
+ $(AR) $@ $?
+ $(RANLIB) $@
+
$(DEFFILE):
echo EXPORTS > $@
for %%d in ($(EXPORTED)) do echo %%d>> $@