summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShmuel Zeigerman <solomuz0@gmail.com>2014-08-08 18:48:26 +0300
committerShmuel Zeigerman <solomuz0@gmail.com>2014-08-08 18:48:26 +0300
commitc3ef7559e106053cfa27a927254feb56e80bd6a3 (patch)
tree1cd34f3afd88c5390e4f80e088eeceb74e24e4fb
parent21b5b1b3ba1069f95e446552a2dccdc3943211a7 (diff)
downloadlrexlib-c3ef7559e106053cfa27a927254feb56e80bd6a3.tar.gz
Add function rex.count and disallow nil/false as the replacement argument in rex.gsub (issue #10).
-rw-r--r--doc/manual.txt74
-rw-r--r--src/algo.h82
-rw-r--r--src/gnu/lgnu.c1
-rw-r--r--src/oniguruma/lonig.c1
-rw-r--r--src/pcre/lpcre.c1
-rw-r--r--src/posix/lposix.c1
-rw-r--r--src/tre/ltre.c1
-rw-r--r--src/tre/ltre_w.c1
-rw-r--r--test/common_sets.lua13
-rw-r--r--windows/mingw/rex_onig.mak2
10 files changed, 141 insertions, 36 deletions
diff --git a/doc/manual.txt b/doc/manual.txt
index f7d063c..50397b2 100644
--- a/doc/manual.txt
+++ b/doc/manual.txt
@@ -223,25 +223,24 @@ This function searches for all matches of the pattern *patt* in the string
*subj* and replaces them according to the parameters *repl* and *n* (see details
below).
- +---------+-----------------------------------+-------------------------+-------------+
- |Parameter| Description | Type |Default Value|
- +=========+===================================+=========================+=============+
- | subj |subject | string | n/a |
- +---------+-----------------------------------+-------------------------+-------------+
- | patt |regular expression pattern |string or userdata | n/a |
- +---------+-----------------------------------+-------------------------+-------------+
- | repl |substitution source |string, function, table, | n/a |
- | | |``false`` or ``nil`` | |
- +---------+-----------------------------------+-------------------------+-------------+
- | [n] |maximum number of matches to search| number or function | ``nil`` |
- | |for, or control function, or nil | | |
- +---------+-----------------------------------+-------------------------+-------------+
- | [cf] |compilation flags (bitwise OR) | number | cf_ |
- +---------+-----------------------------------+-------------------------+-------------+
- | [ef] |execution flags (bitwise OR) | number | ef_ |
- +---------+-----------------------------------+-------------------------+-------------+
- |[larg...]|library-specific arguments | | |
- +---------+-----------------------------------+-------------------------+-------------+
+ +---------+-----------------------------------+--------------------------+-------------+
+ |Parameter| Description | Type |Default Value|
+ +=========+===================================+==========================+=============+
+ | subj |subject | string | n/a |
+ +---------+-----------------------------------+--------------------------+-------------+
+ | patt |regular expression pattern |string or userdata | n/a |
+ +---------+-----------------------------------+--------------------------+-------------+
+ | repl |substitution source |string, function or table | n/a |
+ +---------+-----------------------------------+--------------------------+-------------+
+ | [n] |maximum number of matches to search| number or function | ``nil`` |
+ | |for, or control function, or nil | | |
+ +---------+-----------------------------------+--------------------------+-------------+
+ | [cf] |compilation flags (bitwise OR) | number | cf_ |
+ +---------+-----------------------------------+--------------------------+-------------+
+ | [ef] |execution flags (bitwise OR) | number | ef_ |
+ +---------+-----------------------------------+--------------------------+-------------+
+ |[larg...]|library-specific arguments | | |
+ +---------+-----------------------------------+--------------------------+-------------+
**Returns:**
1. The subject string with the substitutions made.
@@ -249,9 +248,9 @@ below).
3. Number of substitutions made.
**Details:**
- The parameter *repl* can be either a string, a function, a table,
- ``false`` or ``nil``. On each match made, it is converted into a
- value *repl_out* that may be used for the replacement.
+ The parameter *repl* can be either a string, a function or a table.
+ On each match made, it is converted into a value *repl_out* that may be used
+ for the replacement.
*repl_out* is generated differently depending on the type of *repl*:
@@ -290,11 +289,6 @@ below).
same rules as for the return value of *repl* call, described in the above
paragraph.
- 4. If *repl* is ``false`` or ``nil``, no replacement is done. Note
- that, unusually for Lua, if ``repl`` is absent, it is not taken
- to be ``nil``. This is to prevent programming errors caused by
- inadvertently missing out *repl*.
-
Note: Under some circumstances, the value of *repl_out* may be ignored; see
below_.
@@ -378,6 +372,32 @@ subject.
------------------------------------------------------------
+count
+-----
+
+:funcdef:`rex.count (subj, patt, [cf], [ef], [larg...])`
+
+This function counts matches of the pattern *patt* in the string *subj*.
+
+ +---------+-----------------------------------+--------------------------+-------------+
+ |Parameter| Description | Type |Default Value|
+ +=========+===================================+==========================+=============+
+ | subj |subject | string | n/a |
+ +---------+-----------------------------------+--------------------------+-------------+
+ | patt |regular expression pattern |string or userdata | n/a |
+ +---------+-----------------------------------+--------------------------+-------------+
+ | [cf] |compilation flags (bitwise OR) | number | cf_ |
+ +---------+-----------------------------------+--------------------------+-------------+
+ | [ef] |execution flags (bitwise OR) | number | ef_ |
+ +---------+-----------------------------------+--------------------------+-------------+
+ |[larg...]|library-specific arguments | | |
+ +---------+-----------------------------------+--------------------------+-------------+
+
+**Returns:**
+ 1. Number of matches found.
+
+------------------------------------------------------------
+
flags
-----
diff --git a/src/algo.h b/src/algo.h
index e5ac525..e4a98e1 100644
--- a/src/algo.h
+++ b/src/algo.h
@@ -173,10 +173,8 @@ static void checkarg_gsub (lua_State *L, TArgComp *argC, TArgExec *argE) {
lua_tostring (L, 3); /* converts number (if any) to string */
argE->reptype = lua_type (L, 3);
if (argE->reptype != LUA_TSTRING && argE->reptype != LUA_TTABLE &&
- argE->reptype != LUA_TFUNCTION && argE->reptype != LUA_TNIL &&
- (argE->reptype != LUA_TBOOLEAN ||
- (argE->reptype == LUA_TBOOLEAN && lua_toboolean (L, 3)))) {
- luaL_typerror (L, 3, "string, table, function, false or nil");
+ argE->reptype != LUA_TFUNCTION) {
+ luaL_typerror (L, 3, "string, table or function");
}
argE->funcpos = 3;
argE->funcpos2 = 4;
@@ -187,6 +185,16 @@ static void checkarg_gsub (lua_State *L, TArgComp *argC, TArgExec *argE) {
}
+/* function count (s, patt, [cf], [ef], [larg...]) */
+static void checkarg_count (lua_State *L, TArgComp *argC, TArgExec *argE) {
+ check_subject (L, 1, argE);
+ check_pattern (L, 2, argC);
+ argC->cflags = ALG_GETCFLAGS (L, 3);
+ argE->eflags = luaL_optint (L, 4, ALG_EFLAGS_DFLT);
+ ALG_GETCARGS (L, 5, argC);
+}
+
+
/* function find (s, patt, [st], [cf], [ef], [larg...]) */
/* function match (s, patt, [st], [cf], [ef], [larg...]) */
static void checkarg_find_func (lua_State *L, TArgComp *argC, TArgExec *argE) {
@@ -337,10 +345,6 @@ static int algf_gsub (lua_State *L) {
}
}
/*----------------------------------------------------------------*/
- else if (argE.reptype == LUA_TNIL || argE.reptype == LUA_TBOOLEAN) {
- buffer_addlstring (pBuf, argE.text + from, to - from);
- }
- /*----------------------------------------------------------------*/
if (argE.reptype == LUA_TTABLE || argE.reptype == LUA_TFUNCTION) {
if (lua_tostring (L, -1)) {
buffer_addvalue (pBuf, -1);
@@ -425,6 +429,68 @@ static int algf_gsub (lua_State *L) {
}
+static int algf_count (lua_State *L) {
+ TUserdata *ud;
+ TArgComp argC;
+ TArgExec argE;
+ int n_match = 0, st = 0, retry;
+ /*------------------------------------------------------------------*/
+ checkarg_count (L, &argC, &argE);
+ if (argC.ud) {
+ ud = (TUserdata*) argC.ud;
+ lua_pushvalue (L, 2);
+ }
+ else compile_regex (L, &argC, &ud);
+ /*------------------------------------------------------------------*/
+ SET_RETRY (retry, 0);
+ while (st <= (int)argE.textlen) {
+ int to, res;
+ res = GSUB_EXEC (ud, &argE, st, retry);
+ if (ALG_NOMATCH (res)) {
+#ifdef ALG_USERETRY
+ if (retry) {
+ if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
+ st += ALG_CHARSIZE;
+ retry = 0;
+ continue;
+ }
+ }
+#endif
+ break;
+ }
+ else if (!ALG_ISMATCH (res)) {
+ return generate_error (L, ud, res);
+ }
+ ++n_match;
+ to = ALG_BASE(st) + ALG_SUBEND(ud,0);
+#ifdef ALG_PULL
+ {
+ int from = ALG_BASE(st) + ALG_SUBBEG(ud,0);
+ if (st < from)
+ st = from;
+ }
+#endif
+ /*----------------------------------------------------------------*/
+ if (st < to) {
+ st = to;
+ SET_RETRY (retry, 0);
+ }
+ else if (st < (int)argE.textlen) {
+#ifdef ALG_USERETRY
+ retry = 1;
+#else
+ /* advance by 1 char (not replaced) */
+ st += ALG_CHARSIZE;
+#endif
+ }
+ else break;
+ }
+ /*------------------------------------------------------------------*/
+ lua_pushinteger (L, n_match);
+ return 1;
+}
+
+
static int finish_generic_find (lua_State *L, TUserdata *ud, TArgExec *argE,
int method, int res)
{
diff --git a/src/gnu/lgnu.c b/src/gnu/lgnu.c
index 54b73ec..a49560e 100644
--- a/src/gnu/lgnu.c
+++ b/src/gnu/lgnu.c
@@ -299,6 +299,7 @@ static const luaL_Reg r_functions[] = {
{ "find", algf_find },
{ "gmatch", algf_gmatch },
{ "gsub", algf_gsub },
+ { "count", algf_count },
{ "split", algf_split },
{ "new", algf_new },
{ "flags", Gnu_get_flags },
diff --git a/src/oniguruma/lonig.c b/src/oniguruma/lonig.c
index d622445..fa43afb 100644
--- a/src/oniguruma/lonig.c
+++ b/src/oniguruma/lonig.c
@@ -329,6 +329,7 @@ static const luaL_Reg r_functions[] = {
{ "find", algf_find },
{ "gmatch", algf_gmatch },
{ "gsub", algf_gsub },
+ { "count", algf_count },
{ "split", algf_split },
{ "new", algf_new },
{ "flags", LOnig_get_flags },
diff --git a/src/pcre/lpcre.c b/src/pcre/lpcre.c
index 87d1a56..7550ecf 100644
--- a/src/pcre/lpcre.c
+++ b/src/pcre/lpcre.c
@@ -392,6 +392,7 @@ static const luaL_Reg r_functions[] = {
{ "find", algf_find },
{ "gmatch", algf_gmatch },
{ "gsub", algf_gsub },
+ { "count", algf_count },
{ "split", algf_split },
{ "new", algf_new },
{ "flags", Lpcre_get_flags },
diff --git a/src/posix/lposix.c b/src/posix/lposix.c
index 52908d8..988a3aa 100644
--- a/src/posix/lposix.c
+++ b/src/posix/lposix.c
@@ -263,6 +263,7 @@ static const luaL_Reg r_functions[] = {
{ "find", algf_find },
{ "gmatch", algf_gmatch },
{ "gsub", algf_gsub },
+ { "count", algf_count },
{ "split", algf_split },
{ "new", algf_new },
{ "flags", Posix_get_flags },
diff --git a/src/tre/ltre.c b/src/tre/ltre.c
index d734434..6c9ed7c 100644
--- a/src/tre/ltre.c
+++ b/src/tre/ltre.c
@@ -331,6 +331,7 @@ static const luaL_Reg r_functions[] = {
{ "find", algf_find },
{ "gmatch", algf_gmatch },
{ "gsub", algf_gsub },
+ { "count", algf_count },
{ "match", algf_match },
{ "split", algf_split },
{ "config", Ltre_config },
diff --git a/src/tre/ltre_w.c b/src/tre/ltre_w.c
index c335346..0a6e370 100644
--- a/src/tre/ltre_w.c
+++ b/src/tre/ltre_w.c
@@ -212,6 +212,7 @@ static const luaL_Reg r_functions[] = {
{ "wfind", algf_find },
{ "wgmatch", algf_gmatch },
{ "wgsub", algf_gsub },
+ { "wcount", algf_count },
{ "wmatch", algf_match },
{ "wsplit", algf_split },
{ NULL, NULL }
diff --git a/test/common_sets.lua b/test/common_sets.lua
index 8489030..b8c0c7b 100644
--- a/test/common_sets.lua
+++ b/test/common_sets.lua
@@ -37,6 +37,18 @@ local function set_f_gmatch (lib, flg)
}
end
+local function set_f_count (lib, flg)
+ return {
+ Name = "Function count",
+ Func = lib.count,
+ --{ subj patt results }
+ { {"ab", lib.new"."}, { 2 } },
+ { {("abcd"):rep(3), "(.)b.(d)"}, { 3 } },
+ { {"abcd", ".*" }, { 2 } },
+ { {"abc", "^." }, { 1 } },
+ }
+end
+
local function set_f_split (lib, flg)
-- split (s, p, [cf], [ef])
local function test_split (subj, patt)
@@ -308,6 +320,7 @@ return function (libname)
set_m_tfind (lib),
set_m_find (lib),
set_m_match (lib),
+ set_f_count (lib),
set_f_gsub1 (lib),
set_f_gsub2 (lib),
set_f_gsub3 (lib),
diff --git a/windows/mingw/rex_onig.mak b/windows/mingw/rex_onig.mak
index deaeb40..5ce7b24 100644
--- a/windows/mingw/rex_onig.mak
+++ b/windows/mingw/rex_onig.mak
@@ -10,7 +10,7 @@ MYINCS = -I$(REGEXINC)
MYLIBS = -lonig -Wl,--enable-auto-import
OBJ = lonig.o lonig_f.o common.o
PROJDIR = oniguruma
-TESTNAME = onig
+TESTNAME = oniguruma
include _mingw.mak