From c3ef7559e106053cfa27a927254feb56e80bd6a3 Mon Sep 17 00:00:00 2001 From: Shmuel Zeigerman Date: Fri, 8 Aug 2014 18:48:26 +0300 Subject: Add function rex.count and disallow nil/false as the replacement argument in rex.gsub (issue #10). --- doc/manual.txt | 74 ++++++++++++++++++++++++++--------------- src/algo.h | 82 +++++++++++++++++++++++++++++++++++++++++----- src/gnu/lgnu.c | 1 + src/oniguruma/lonig.c | 1 + src/pcre/lpcre.c | 1 + src/posix/lposix.c | 1 + src/tre/ltre.c | 1 + src/tre/ltre_w.c | 1 + test/common_sets.lua | 13 ++++++++ windows/mingw/rex_onig.mak | 2 +- 10 files changed, 141 insertions(+), 36 deletions(-) diff --git a/doc/manual.txt b/doc/manual.txt index f7d063c..50397b2 100644 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -223,25 +223,24 @@ This function searches for all matches of the pattern *patt* in the string *subj* and replaces them according to the parameters *repl* and *n* (see details below). - +---------+-----------------------------------+-------------------------+-------------+ - |Parameter| Description | Type |Default Value| - +=========+===================================+=========================+=============+ - | subj |subject | string | n/a | - +---------+-----------------------------------+-------------------------+-------------+ - | patt |regular expression pattern |string or userdata | n/a | - +---------+-----------------------------------+-------------------------+-------------+ - | repl |substitution source |string, function, table, | n/a | - | | |``false`` or ``nil`` | | - +---------+-----------------------------------+-------------------------+-------------+ - | [n] |maximum number of matches to search| number or function | ``nil`` | - | |for, or control function, or nil | | | - +---------+-----------------------------------+-------------------------+-------------+ - | [cf] |compilation flags (bitwise OR) | number | cf_ | - +---------+-----------------------------------+-------------------------+-------------+ - | [ef] |execution flags (bitwise OR) | number | ef_ | - +---------+-----------------------------------+-------------------------+-------------+ - |[larg...]|library-specific arguments | | | - +---------+-----------------------------------+-------------------------+-------------+ + +---------+-----------------------------------+--------------------------+-------------+ + |Parameter| Description | Type |Default Value| + +=========+===================================+==========================+=============+ + | subj |subject | string | n/a | + +---------+-----------------------------------+--------------------------+-------------+ + | patt |regular expression pattern |string or userdata | n/a | + +---------+-----------------------------------+--------------------------+-------------+ + | repl |substitution source |string, function or table | n/a | + +---------+-----------------------------------+--------------------------+-------------+ + | [n] |maximum number of matches to search| number or function | ``nil`` | + | |for, or control function, or nil | | | + +---------+-----------------------------------+--------------------------+-------------+ + | [cf] |compilation flags (bitwise OR) | number | cf_ | + +---------+-----------------------------------+--------------------------+-------------+ + | [ef] |execution flags (bitwise OR) | number | ef_ | + +---------+-----------------------------------+--------------------------+-------------+ + |[larg...]|library-specific arguments | | | + +---------+-----------------------------------+--------------------------+-------------+ **Returns:** 1. The subject string with the substitutions made. @@ -249,9 +248,9 @@ below). 3. Number of substitutions made. **Details:** - The parameter *repl* can be either a string, a function, a table, - ``false`` or ``nil``. On each match made, it is converted into a - value *repl_out* that may be used for the replacement. + The parameter *repl* can be either a string, a function or a table. + On each match made, it is converted into a value *repl_out* that may be used + for the replacement. *repl_out* is generated differently depending on the type of *repl*: @@ -290,11 +289,6 @@ below). same rules as for the return value of *repl* call, described in the above paragraph. - 4. If *repl* is ``false`` or ``nil``, no replacement is done. Note - that, unusually for Lua, if ``repl`` is absent, it is not taken - to be ``nil``. This is to prevent programming errors caused by - inadvertently missing out *repl*. - Note: Under some circumstances, the value of *repl_out* may be ignored; see below_. @@ -378,6 +372,32 @@ subject. ------------------------------------------------------------ +count +----- + +:funcdef:`rex.count (subj, patt, [cf], [ef], [larg...])` + +This function counts matches of the pattern *patt* in the string *subj*. + + +---------+-----------------------------------+--------------------------+-------------+ + |Parameter| Description | Type |Default Value| + +=========+===================================+==========================+=============+ + | subj |subject | string | n/a | + +---------+-----------------------------------+--------------------------+-------------+ + | patt |regular expression pattern |string or userdata | n/a | + +---------+-----------------------------------+--------------------------+-------------+ + | [cf] |compilation flags (bitwise OR) | number | cf_ | + +---------+-----------------------------------+--------------------------+-------------+ + | [ef] |execution flags (bitwise OR) | number | ef_ | + +---------+-----------------------------------+--------------------------+-------------+ + |[larg...]|library-specific arguments | | | + +---------+-----------------------------------+--------------------------+-------------+ + +**Returns:** + 1. Number of matches found. + +------------------------------------------------------------ + flags ----- diff --git a/src/algo.h b/src/algo.h index e5ac525..e4a98e1 100644 --- a/src/algo.h +++ b/src/algo.h @@ -173,10 +173,8 @@ static void checkarg_gsub (lua_State *L, TArgComp *argC, TArgExec *argE) { lua_tostring (L, 3); /* converts number (if any) to string */ argE->reptype = lua_type (L, 3); if (argE->reptype != LUA_TSTRING && argE->reptype != LUA_TTABLE && - argE->reptype != LUA_TFUNCTION && argE->reptype != LUA_TNIL && - (argE->reptype != LUA_TBOOLEAN || - (argE->reptype == LUA_TBOOLEAN && lua_toboolean (L, 3)))) { - luaL_typerror (L, 3, "string, table, function, false or nil"); + argE->reptype != LUA_TFUNCTION) { + luaL_typerror (L, 3, "string, table or function"); } argE->funcpos = 3; argE->funcpos2 = 4; @@ -187,6 +185,16 @@ static void checkarg_gsub (lua_State *L, TArgComp *argC, TArgExec *argE) { } +/* function count (s, patt, [cf], [ef], [larg...]) */ +static void checkarg_count (lua_State *L, TArgComp *argC, TArgExec *argE) { + check_subject (L, 1, argE); + check_pattern (L, 2, argC); + argC->cflags = ALG_GETCFLAGS (L, 3); + argE->eflags = luaL_optint (L, 4, ALG_EFLAGS_DFLT); + ALG_GETCARGS (L, 5, argC); +} + + /* function find (s, patt, [st], [cf], [ef], [larg...]) */ /* function match (s, patt, [st], [cf], [ef], [larg...]) */ static void checkarg_find_func (lua_State *L, TArgComp *argC, TArgExec *argE) { @@ -337,10 +345,6 @@ static int algf_gsub (lua_State *L) { } } /*----------------------------------------------------------------*/ - else if (argE.reptype == LUA_TNIL || argE.reptype == LUA_TBOOLEAN) { - buffer_addlstring (pBuf, argE.text + from, to - from); - } - /*----------------------------------------------------------------*/ if (argE.reptype == LUA_TTABLE || argE.reptype == LUA_TFUNCTION) { if (lua_tostring (L, -1)) { buffer_addvalue (pBuf, -1); @@ -425,6 +429,68 @@ static int algf_gsub (lua_State *L) { } +static int algf_count (lua_State *L) { + TUserdata *ud; + TArgComp argC; + TArgExec argE; + int n_match = 0, st = 0, retry; + /*------------------------------------------------------------------*/ + checkarg_count (L, &argC, &argE); + if (argC.ud) { + ud = (TUserdata*) argC.ud; + lua_pushvalue (L, 2); + } + else compile_regex (L, &argC, &ud); + /*------------------------------------------------------------------*/ + SET_RETRY (retry, 0); + while (st <= (int)argE.textlen) { + int to, res; + res = GSUB_EXEC (ud, &argE, st, retry); + if (ALG_NOMATCH (res)) { +#ifdef ALG_USERETRY + if (retry) { + if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */ + st += ALG_CHARSIZE; + retry = 0; + continue; + } + } +#endif + break; + } + else if (!ALG_ISMATCH (res)) { + return generate_error (L, ud, res); + } + ++n_match; + to = ALG_BASE(st) + ALG_SUBEND(ud,0); +#ifdef ALG_PULL + { + int from = ALG_BASE(st) + ALG_SUBBEG(ud,0); + if (st < from) + st = from; + } +#endif + /*----------------------------------------------------------------*/ + if (st < to) { + st = to; + SET_RETRY (retry, 0); + } + else if (st < (int)argE.textlen) { +#ifdef ALG_USERETRY + retry = 1; +#else + /* advance by 1 char (not replaced) */ + st += ALG_CHARSIZE; +#endif + } + else break; + } + /*------------------------------------------------------------------*/ + lua_pushinteger (L, n_match); + return 1; +} + + static int finish_generic_find (lua_State *L, TUserdata *ud, TArgExec *argE, int method, int res) { diff --git a/src/gnu/lgnu.c b/src/gnu/lgnu.c index 54b73ec..a49560e 100644 --- a/src/gnu/lgnu.c +++ b/src/gnu/lgnu.c @@ -299,6 +299,7 @@ static const luaL_Reg r_functions[] = { { "find", algf_find }, { "gmatch", algf_gmatch }, { "gsub", algf_gsub }, + { "count", algf_count }, { "split", algf_split }, { "new", algf_new }, { "flags", Gnu_get_flags }, diff --git a/src/oniguruma/lonig.c b/src/oniguruma/lonig.c index d622445..fa43afb 100644 --- a/src/oniguruma/lonig.c +++ b/src/oniguruma/lonig.c @@ -329,6 +329,7 @@ static const luaL_Reg r_functions[] = { { "find", algf_find }, { "gmatch", algf_gmatch }, { "gsub", algf_gsub }, + { "count", algf_count }, { "split", algf_split }, { "new", algf_new }, { "flags", LOnig_get_flags }, diff --git a/src/pcre/lpcre.c b/src/pcre/lpcre.c index 87d1a56..7550ecf 100644 --- a/src/pcre/lpcre.c +++ b/src/pcre/lpcre.c @@ -392,6 +392,7 @@ static const luaL_Reg r_functions[] = { { "find", algf_find }, { "gmatch", algf_gmatch }, { "gsub", algf_gsub }, + { "count", algf_count }, { "split", algf_split }, { "new", algf_new }, { "flags", Lpcre_get_flags }, diff --git a/src/posix/lposix.c b/src/posix/lposix.c index 52908d8..988a3aa 100644 --- a/src/posix/lposix.c +++ b/src/posix/lposix.c @@ -263,6 +263,7 @@ static const luaL_Reg r_functions[] = { { "find", algf_find }, { "gmatch", algf_gmatch }, { "gsub", algf_gsub }, + { "count", algf_count }, { "split", algf_split }, { "new", algf_new }, { "flags", Posix_get_flags }, diff --git a/src/tre/ltre.c b/src/tre/ltre.c index d734434..6c9ed7c 100644 --- a/src/tre/ltre.c +++ b/src/tre/ltre.c @@ -331,6 +331,7 @@ static const luaL_Reg r_functions[] = { { "find", algf_find }, { "gmatch", algf_gmatch }, { "gsub", algf_gsub }, + { "count", algf_count }, { "match", algf_match }, { "split", algf_split }, { "config", Ltre_config }, diff --git a/src/tre/ltre_w.c b/src/tre/ltre_w.c index c335346..0a6e370 100644 --- a/src/tre/ltre_w.c +++ b/src/tre/ltre_w.c @@ -212,6 +212,7 @@ static const luaL_Reg r_functions[] = { { "wfind", algf_find }, { "wgmatch", algf_gmatch }, { "wgsub", algf_gsub }, + { "wcount", algf_count }, { "wmatch", algf_match }, { "wsplit", algf_split }, { NULL, NULL } diff --git a/test/common_sets.lua b/test/common_sets.lua index 8489030..b8c0c7b 100644 --- a/test/common_sets.lua +++ b/test/common_sets.lua @@ -37,6 +37,18 @@ local function set_f_gmatch (lib, flg) } end +local function set_f_count (lib, flg) + return { + Name = "Function count", + Func = lib.count, + --{ subj patt results } + { {"ab", lib.new"."}, { 2 } }, + { {("abcd"):rep(3), "(.)b.(d)"}, { 3 } }, + { {"abcd", ".*" }, { 2 } }, + { {"abc", "^." }, { 1 } }, + } +end + local function set_f_split (lib, flg) -- split (s, p, [cf], [ef]) local function test_split (subj, patt) @@ -308,6 +320,7 @@ return function (libname) set_m_tfind (lib), set_m_find (lib), set_m_match (lib), + set_f_count (lib), set_f_gsub1 (lib), set_f_gsub2 (lib), set_f_gsub3 (lib), diff --git a/windows/mingw/rex_onig.mak b/windows/mingw/rex_onig.mak index deaeb40..5ce7b24 100644 --- a/windows/mingw/rex_onig.mak +++ b/windows/mingw/rex_onig.mak @@ -10,7 +10,7 @@ MYINCS = -I$(REGEXINC) MYLIBS = -lonig -Wl,--enable-auto-import OBJ = lonig.o lonig_f.o common.o PROJDIR = oniguruma -TESTNAME = onig +TESTNAME = oniguruma include _mingw.mak -- cgit v1.2.1