diff options
author | Shmuel Zeigerman <solomuz0@gmail.com> | 2015-02-26 19:52:07 +0200 |
---|---|---|
committer | Shmuel Zeigerman <solomuz0@gmail.com> | 2015-02-26 19:52:07 +0200 |
commit | 0684b19e44ea5e937791a051133d31eb06c27292 (patch) | |
tree | 55a30cf02d9255b555505b7374b95423d696c68b | |
parent | a5c2a17019a42cccc8a5163bb8dc57af6f17f61d (diff) | |
download | lrexlib-0684b19e44ea5e937791a051133d31eb06c27292.tar.gz |
In the functions searching for multiple matches every empty match adjacent to the previous match is discarded.
-rw-r--r-- | doc/manual.txt | 4 | ||||
-rw-r--r-- | src/algo.h | 136 | ||||
-rw-r--r-- | test/common_sets.lua | 6 | ||||
-rw-r--r-- | test/oniguruma_sets.lua | 29 | ||||
-rw-r--r-- | test/pcre_sets.lua | 29 | ||||
-rw-r--r-- | windows/mingw/_mingw.mak | 28 |
6 files changed, 140 insertions, 92 deletions
diff --git a/doc/manual.txt b/doc/manual.txt index 5fec66a..9a49bc1 100644 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -88,6 +88,10 @@ Notes 9. The notation *larg...* is used to indicate optional library-specific arguments, which are documented in the ``new`` method of each library. +10. In the functions searching for multiple matches (``gmatch``, ``gsub``, + ``split``, ``count``) every empty match adjacent to the previous match + is discarded, e.g. ``rex.count("abc",".*")`` will return 1. + ------------------------------------------------------------ Functions and methods common to all bindings @@ -228,7 +228,7 @@ static int algf_gsub (lua_State *L) { TUserdata *ud; TArgComp argC; TArgExec argE; - int n_match = 0, n_subst = 0, st = 0; + int n_match = 0, n_subst = 0, st = 0, last_to = -1; TBuffer BufOut, BufRep, BufTemp, *pBuf = &BufOut; TFreeList freelist; /*------------------------------------------------------------------*/ @@ -262,9 +262,18 @@ static int algf_gsub (lua_State *L) { freelist_free (&freelist); return generate_error (L, ud, res); } - ++n_match; from = ALG_BASE(st) + ALG_SUBBEG(ud,0); to = ALG_BASE(st) + ALG_SUBEND(ud,0); + if (to == last_to) { /* discard an empty match adjacent to the previous match */ + if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */ + buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE); + st += ALG_CHARSIZE; + continue; + } + break; + } + last_to = to; + ++n_match; if (st < from) { buffer_addlstring (&BufOut, argE.text + st, from - st); #ifdef ALG_PULL @@ -392,7 +401,7 @@ static int algf_count (lua_State *L) { TUserdata *ud; TArgComp argC; TArgExec argE; - int n_match = 0, st = 0; + int n_match = 0, st = 0, last_to = -1; /*------------------------------------------------------------------*/ checkarg_count (L, &argC, &argE); if (argC.ud) { @@ -410,8 +419,16 @@ static int algf_count (lua_State *L) { else if (!ALG_ISMATCH (res)) { return generate_error (L, ud, res); } - ++n_match; to = ALG_BASE(st) + ALG_SUBEND(ud,0); + if (to == last_to) { /* discard an empty match adjacent to the previous match */ + if (st < (int)argE.textlen) { /* advance by 1 char */ + st += ALG_CHARSIZE; + continue; + } + break; + } + last_to = to; + ++n_match; #ifdef ALG_PULL { int from = ALG_BASE(st) + ALG_SUBBEG(ud,0); @@ -487,24 +504,32 @@ static int algf_match (lua_State *L) { static int gmatch_iter (lua_State *L) { + int last_end, res; TArgExec argE; TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1)); argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen); argE.eflags = lua_tointeger (L, lua_upvalueindex (3)); argE.startoffset = lua_tointeger (L, lua_upvalueindex (4)); - - if (argE.startoffset > (int)argE.textlen) - return 0; + last_end = lua_tointeger (L, lua_upvalueindex (5)); while (1) { - int res = gmatch_exec (ud, &argE); + if (argE.startoffset > (int)argE.textlen) + return 0; + res = gmatch_exec (ud, &argE); if (ALG_ISMATCH (res)) { int incr = 0; if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */ + if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) { + argE.startoffset += ALG_CHARSIZE; + continue; + } incr = ALG_CHARSIZE; } - lua_pushinteger(L, ALG_BASE(argE.startoffset) + incr + ALG_SUBEND(ud,0)); /* update start offset */ + last_end = ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0); + lua_pushinteger(L, last_end + incr); /* update start offset */ lua_replace (L, lua_upvalueindex (4)); + lua_pushinteger(L, last_end); /* update last end of match */ + lua_replace (L, lua_upvalueindex (5)); /* push either captures or entire match */ if (ALG_NSUB(ud)) { push_substrings (L, ud, argE.text, NULL); @@ -515,9 +540,8 @@ static int gmatch_iter (lua_State *L) { return 1; } } - else if (ALG_NOMATCH (res)) { + else if (ALG_NOMATCH (res)) return 0; - } else return generate_error (L, ud, res); } @@ -525,47 +549,55 @@ static int gmatch_iter (lua_State *L) { static int split_iter (lua_State *L) { - int incr, newoffset, res; + int incr, last_end, newoffset, res; TArgExec argE; TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1)); argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen); argE.eflags = lua_tointeger (L, lua_upvalueindex (3)); argE.startoffset = lua_tointeger (L, lua_upvalueindex (4)); incr = lua_tointeger (L, lua_upvalueindex (5)); + last_end = lua_tointeger (L, lua_upvalueindex (6)); - if (argE.startoffset > (int)argE.textlen) + if (incr < 0) return 0; - if ((newoffset = argE.startoffset + incr) > (int)argE.textlen) - goto nomatch; - - res = split_exec (ud, &argE, newoffset); - if (ALG_ISMATCH (res)) { - lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset */ - lua_replace (L, lua_upvalueindex (4)); - lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */ - lua_replace (L, lua_upvalueindex (5)); - /* push text preceding the match */ - lua_pushlstring (L, argE.text + argE.startoffset, - ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset); - /* push either captures or entire match */ - if (ALG_NSUB(ud)) { - push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL); - return 1 + ALG_NSUB(ud); - } - else { - ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0); - return 2; + while (1) { + if ((newoffset = argE.startoffset + incr) > (int)argE.textlen) + break; + res = split_exec (ud, &argE, newoffset); + if (ALG_ISMATCH (res)) { + if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */ + if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) { + incr += ALG_CHARSIZE; + continue; + } + } + lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset and last_end */ + lua_pushvalue (L, -1); + lua_replace (L, lua_upvalueindex (4)); + lua_replace (L, lua_upvalueindex (6)); + lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */ + lua_replace (L, lua_upvalueindex (5)); + /* push text preceding the match */ + lua_pushlstring (L, argE.text + argE.startoffset, + ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset); + /* push either captures or entire match */ + if (ALG_NSUB(ud)) { + push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL); + return 1 + ALG_NSUB(ud); + } + else { + ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0); + return 2; + } } + else if (ALG_NOMATCH (res)) + break; + else + return generate_error (L, ud, res); } - else if (ALG_NOMATCH (res)) - goto nomatch; - else - return generate_error (L, ud, res); - -nomatch: - lua_pushinteger (L, argE.textlen + 1); /* mark as last iteration */ - lua_replace (L, lua_upvalueindex (4)); /* update start offset */ + lua_pushinteger (L, -1); /* mark as last iteration */ + lua_replace (L, lua_upvalueindex (5)); /* incr = -1 */ lua_pushlstring (L, argE.text+argE.startoffset, argE.textlen-argE.startoffset); return 1; } @@ -575,17 +607,16 @@ static int algf_gmatch (lua_State *L) { TArgComp argC; TArgExec argE; - TUserdata *ud; checkarg_gmatch_split (L, &argC, &argE); - if (argC.ud) { - ud = (TUserdata*) argC.ud; + if (argC.ud) lua_pushvalue (L, 2); - } - else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */ + else + compile_regex (L, &argC, NULL); /* 1-st upvalue: ud */ gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */ lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */ lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */ - lua_pushcclosure (L, gmatch_iter, 4); + lua_pushinteger (L, -1); /* 5-th upvalue: last end of match */ + lua_pushcclosure (L, gmatch_iter, 5); return 1; } @@ -593,18 +624,17 @@ static int algf_split (lua_State *L) { TArgComp argC; TArgExec argE; - TUserdata *ud; checkarg_gmatch_split (L, &argC, &argE); - if (argC.ud) { - ud = (TUserdata*) argC.ud; + if (argC.ud) lua_pushvalue (L, 2); - } - else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */ + else + compile_regex (L, &argC, NULL); /* 1-st upvalue: ud */ gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */ lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */ lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */ lua_pushinteger (L, 0); /* 5-th upvalue: incr */ - lua_pushcclosure (L, split_iter, 5); + lua_pushinteger (L, -1); /* 6-th upvalue: last_end */ + lua_pushcclosure (L, split_iter, 6); return 1; } diff --git a/test/common_sets.lua b/test/common_sets.lua index 828d077..b259efc 100644 --- a/test/common_sets.lua +++ b/test/common_sets.lua @@ -33,7 +33,7 @@ local function set_f_gmatch (lib, flg) --{ subj patt results } { {"ab", lib.new"."}, {{"a",N}, {"b",N} } }, { {("abcd"):rep(3), "(.)b.(d)"}, {{"a","d"},{"a","d"},{"a","d"}} }, - { {"abcd", ".*" }, {{"abcd",N},{"",N} } },--zero-length match + { {"abcd", ".*" }, {{"abcd",N} } },--zero-length match { {"abc", "^." }, {{"a",N}} },--anchored pattern } end @@ -45,7 +45,7 @@ local function set_f_count (lib, flg) --{ subj patt results } { {"ab", lib.new"."}, { 2 } }, { {("abcd"):rep(3), "(.)b.(d)"}, { 3 } }, - { {"abcd", ".*" }, { 2 } }, + { {"abcd", ".*" }, { 1 } }, { {"abc", "^." }, { 1 } }, } end @@ -229,7 +229,7 @@ local function set_f_gsub4 (lib, flg) --{ s, p, f, n, res1, res2, res3 }, { {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test . { {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+ - { {"a2c3", ".*", "#" }, {"##", 2, 2} }, -- test .* + { {"a2c3", ".*", "#" }, {"#", 1, 1} }, -- test .* { {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1, 1} }, { {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D diff --git a/test/oniguruma_sets.lua b/test/oniguruma_sets.lua index 374a48e..164de95 100644 --- a/test/oniguruma_sets.lua +++ b/test/oniguruma_sets.lua @@ -58,7 +58,7 @@ end local function set_f_gmatch (lib, flg) -- gmatch (s, p, [cf], [ef]) - local pCSV = "(^[^,]*)|,([^,]*)" + local pCSV = "[^,]*" local F = false local function test_gmatch (subj, patt) local out, guard = {}, 10 @@ -72,13 +72,15 @@ local function set_f_gmatch (lib, flg) return { Name = "Function gmatch", Func = test_gmatch, - --{ subj patt results } - { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj - { {"", pCSV}, {{"",F}} }, - { {"12", pCSV}, {{"12",F}} }, - ----{ {",", pCSV}, {{"", F},{F,""}} }, - { {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} }, - ----{ {",,12,45,,ab,", pCSV}, {{"",F},{F,""},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} }, + --{ subj patt results } + { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj + { {"", pCSV}, {{"",N}} }, + { {"12", pCSV}, {{"12",N}} }, + { {",", pCSV}, {{"", N},{"", N}} }, + { {"12,,45", pCSV}, {{"12",N},{"",N},{"45",N}} }, + { {",,12,45,,ab,", pCSV}, {{"",N},{"",N},{"12",N},{"45",N},{"",N},{"ab",N},{"",N}} }, + { {"12345", "(.)(.)"}, {{"1","2"},{"3","4"}} }, + { {"12345", "(.)(.?)"}, {{"1","2"},{"3","4"},{"5",""}} }, } end @@ -98,10 +100,13 @@ local function set_f_split (lib, flg) Func = test_split, --{ subj patt results } { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj - { {"ab", "$"}, {{"ab","",N}, {"",N,N}, } }, - { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } }, - { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N},{"",N,N}, } }, - { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } }, + { {"ab", "$"}, {{"ab","",N}, {"",N,N} } }, + { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N} } }, + { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N}, {"",N,N} } }, + { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N} } }, + { {"ab", ".*" }, {{"","ab",N}, {"",N,N} } }, + { {"ab", ".*?" }, {{"","",N}, {"a","",N}, {"b","",N}, {"",N,N} } }, + { {"ab;de", ";*" }, {{"","",N},{"a","",N},{"b",";",N},{"d","",N},{"e","",N},{"",N,N} }}, } end diff --git a/test/pcre_sets.lua b/test/pcre_sets.lua index 3a19b9d..15ca275 100644 --- a/test/pcre_sets.lua +++ b/test/pcre_sets.lua @@ -61,7 +61,7 @@ end local function set_f_gmatch (lib, flg) -- gmatch (s, p, [cf], [ef]) - local pCSV = "(^[^,]*)|,([^,]*)" + local pCSV = "[^,]*" local F = false local function test_gmatch (subj, patt) local out, guard = {}, 10 @@ -75,13 +75,15 @@ local function set_f_gmatch (lib, flg) return { Name = "Function gmatch", Func = test_gmatch, - --{ subj patt results } - { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj - { {"", pCSV}, {{"",F}} }, - { {"12", pCSV}, {{"12",F}} }, - { {",", pCSV}, {{"", F}} }, - { {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} }, - { {",,12,45,,ab,", pCSV}, {{"",F},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} }, + --{ subj patt results } + { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj + { {"", pCSV}, {{"",N}} }, + { {"12", pCSV}, {{"12",N}} }, + { {",", pCSV}, {{"", N},{"", N}} }, + { {"12,,45", pCSV}, {{"12",N},{"",N},{"45",N}} }, + { {",,12,45,,ab,", pCSV}, {{"",N},{"",N},{"12",N},{"45",N},{"",N},{"ab",N},{"",N}} }, + { {"12345", "(.)(.)"}, {{"1","2"},{"3","4"}} }, + { {"12345", "(.)(.?)"}, {{"1","2"},{"3","4"},{"5",""}} }, } end @@ -101,10 +103,13 @@ local function set_f_split (lib, flg) Func = test_split, --{ subj patt results } { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj - { {"ab", "$"}, {{"ab","",N}, {"",N,N}, } }, - { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } }, - { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N},{"",N,N}, } }, - { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } }, + { {"ab", "$"}, {{"ab","",N}, {"",N,N} } }, + { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N} } }, + { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N}, {"",N,N} } }, + { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N} } }, + { {"ab", ".*" }, {{"","ab",N}, {"",N,N} } }, + { {"ab", ".*?" }, {{"","",N}, {"a","",N}, {"b","",N}, {"",N,N} } }, + { {"ab;de", ";*" }, {{"","",N},{"a","",N},{"b",";",N},{"d","",N},{"e","",N},{"",N,N} }}, } end diff --git a/windows/mingw/_mingw.mak b/windows/mingw/_mingw.mak index 224c7fa..bb6e5f0 100644 --- a/windows/mingw/_mingw.mak +++ b/windows/mingw/_mingw.mak @@ -6,27 +6,24 @@ VERSION = 2.7.2 # Target Lua version (51 for Lua 5.1; 52 for Lua 5.2). LUAVERSION = 51 +LUADOTVERSION = $(subst 5,5.,$(LUAVERSION)) # INSTALLPATH : Path to install the built DLL. # LUADLL : Name of Lua DLL to link to (.dll should be omitted). # LUAEXE : Name of Lua interpreter. # LUAINC : Path of Lua include files. -# LIBPATH : Path of lua5.1.dll, lua52.dll, pcre.dll, etc. +# LIBPATH : Path of lua51.dll, lua52.dll, pcre.dll, etc. -LIBPATH = c:\exe32 +INSTALLPATH = s:\exe\lib32\lua\$(LUADOTVERSION) +LUADLL = lua$(LUAVERSION) +LUAINC = s:\progr\work\system\include\lua\$(LUADOTVERSION) +LIBPATH = c:\exe32 ifeq ($(LUAVERSION),51) - INSTALLPATH = s:\exe\lib32\lua\5.1 - LUADLL = lua5.1 LUAEXE = lua.exe - LUAINC = s:\progr\work\system\include\lua\5.1 - MYCFLAGS += -DREX_CREATEGLOBALVAR + CREATEGLOBAL = -DREX_CREATEGLOBALVAR else - INSTALLPATH = s:\exe\lib32\lua\5.2 - LUADLL = lua52 - LUAEXE = lua52.exe - LUAINC = s:\progr\work\system\include\lua\5.2 -# MYCFLAGS += -DREX_CREATEGLOBALVAR + LUAEXE = lua$(LUAVERSION).exe endif # -------------------------------------------------------------------------- @@ -34,8 +31,11 @@ endif BIN = $(PROJECT).dll BININSTALL = $(INSTALLPATH)\$(BIN) CC = mingw32-gcc +AR = ar rcu +RANLIB = ranlib CFLAGS = -W -Wall -O2 $(INCS) -DREX_OPENLIB=luaopen_$(PROJECT) \ - -DREX_LIBNAME=\"$(PROJECT)\" -DVERSION=\"$(VERSION)\" $(MYCFLAGS) + -DREX_LIBNAME=\"$(PROJECT)\" -DVERSION=\"$(VERSION)\" \ + $(CREATEGLOBAL) $(MYCFLAGS) DEFFILE = $(PROJECT).def EXPORTED = luaopen_$(PROJECT) INCS = -I$(LUAINC) $(MYINCS) @@ -61,6 +61,10 @@ test: $(BIN): $(OBJ) $(DEFFILE) $(CC) $(DEFFILE) $(OBJ) -L$(LIBPATH) $(LIBS) -o $@ -shared +lib$(PROJECT)$(LUAVERSION).a: $(OBJ) + $(AR) $@ $? + $(RANLIB) $@ + $(DEFFILE): echo EXPORTS > $@ for %%d in ($(EXPORTED)) do echo %%d>> $@ |