summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorshmuz <shmuz>2007-03-17 11:09:32 +0000
committershmuz <shmuz>2007-03-17 11:09:32 +0000
commitce25d628c034b51c8d03e890e876a89601960e03 (patch)
tree98a7f155c4f9b2c8307b1058530ff90d4158282a
parent1c42e04250bfdf6cbd6e91cdc68d7ba999a7b650 (diff)
downloadlrexlib-ce25d628c034b51c8d03e890e876a89601960e03.tar.gz
many changes: see ChangeLog
-rwxr-xr-xChangeLog230
-rwxr-xr-xLICENSE2
-rwxr-xr-xREADME2
-rwxr-xr-xdoc/index.txt2
-rwxr-xr-xdoc/license.html2
-rwxr-xr-xdoc/manual.html118
-rwxr-xr-xdoc/manual.txt97
-rwxr-xr-xsrc/common.c56
-rwxr-xr-xsrc/common.h31
-rwxr-xr-xsrc/lpcre.c453
-rwxr-xr-xsrc/lpcre_f.c12
-rwxr-xr-xsrc/lposix.c470
-rwxr-xr-xsrc/rex_pcre.mak2
-rwxr-xr-xsrc/rex_posix.mak2
-rwxr-xr-xsrc/scite.properties15
-rwxr-xr-xtest/common_sets.lua142
-rwxr-xr-xtest/pcre_sets.lua4
-rwxr-xr-xtest/pcre_sets2.lua28
18 files changed, 603 insertions, 1065 deletions
diff --git a/ChangeLog b/ChangeLog
index f96b877..b7adfac 100755
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,82 @@
-2007-02-12 Shmuel
+2007-03-10 Shmuel Zeigerman <shmuz@actcom.co.il>
+
+ * algo.h: many functions added.
+ * lpcre.c, lposix.c, ltre.c: many functions removed.
+
+2007-03-09 Shmuel Zeigerman <shmuz@actcom.co.il>
+
+ * algo.h: new file added -- to contain the code of common algorithms.
+ * It is, in fact, a C-file.
+ * gsub, match, find: functions added
+
+ * lpcre.c, lposix.c, ltre.c (gsub, match, find): functions removed.
+
+2007-03-07 Shmuel Zeigerman <shmuz@actcom.co.il>
+
+ * lposix.c (REX_NSUB_BASE1): macro added, to facilitate building for
+ Tom Lord's library.
+
+2007-03-06 Shmuel Zeigerman <shmuz@actcom.co.il>
+
+ * lposix.c, ltre.c (gmatch, split, gsub): bugfixes:
+ incorrect processing of patterns anchored at the beginning.
+ Was cured that way: if (offset > 0) eflags |= REG_NOTBOL;
+
+ * test/common_sets.lua: test cases added.
+
+2007-03-05 Shmuel Zeigerman <shmuz@actcom.co.il>
+
+ * lpcre.c (gsub): bugfix:
+ Test: { {"abcd", "\\b", "%1"}, {"abcd", 2, 2} }.
+
+ * test/pcre_sets2.lua: a test case added.
+
+2007-03-03 Shmuel Zeigerman <shmuz@actcom.co.il>
+
+ * lpcre.c, lposix.c, ltre.c (gsub): [API change]:
+ gsub returns 3 values; the 3-rd is number of replacements made.
+
+ * test/*.lua: corrected tests for gsub (after API change).
+
+2007-03-02 Shmuel Zeigerman <shmuz@actcom.co.il>
+
+ * test/common_sets.lua: added new tests for gsub (after API change).
+
+2007-03-01 Shmuel Zeigerman <shmuz@actcom.co.il>
+
+ * lpcre.c, lposix.c, ltre.c (gsub): [API change]:
+ a) 2-nd return of rep() is ignored --> API-compatible with string.gsub
+ b) argument `n' can be a function --> API extension wrt string.gsub
+
+ * common.h (REX_VERSION): updated to "2.2.0 beta".
+
+2007-02-23 Shmuel Zeigerman <shmuz@actcom.co.il>
+
+ * lpcre.c, lpcre_f.c:
+ * added #ifdef's to do named subpatterns only if PCRE_MAJOR >= 4,
+ otherwise it wouldn't compile for PCRE 3.x [bugreport by Zhao Zhiguo].
+ * same #ifdef for Lpcre_config function.
+
+2007-02-20 Shmuel Zeigerman <shmuz@actcom.co.il>
+
+ * ltre.c (aexec, atfind, have_backrefs, have_approx): new methods.
+ aexec --> exec + approximate matching;
+ atfind --> tfind + approximate matching;
+ have_backrefs --> binding of tre_have_backrefs;
+ have_approx --> binding of tre_have_approx;
+
+2007-02-18 Shmuel Zeigerman <shmuz@actcom.co.il>
+
+ * ltre.c: all uses of regexec replaced by regnexec.
+ * test/common_sets.lua: added tests with nuls in the subject.
+
+2007-02-17 Shmuel Zeigerman <shmuz@actcom.co.il>
+
+ * ltre.c: new file added (started the binding of the TRE regex library).
+ * test/posix_sets.lua: added tests with nuls in the subject or/and
+ the pattern.
+
+2007-02-12 Shmuel Zeigerman <shmuz@actcom.co.il>
* all sources (match, find, tfind, exec, dfa_exec): [API change]
in case of ordinary non-match, only a nil is returned;
@@ -12,7 +90,7 @@
* common.h (REX_VERSION): updated to "2.1.0".
-2007-01-29 Shmuel
+2007-01-29 Shmuel Zeigerman <shmuz@actcom.co.il>
* lposix.c (checkarg_find_f): [bugfix] incorrect default for eflags.
* lposix.c (generic_find): [bugfix] dereferencing uninitialized pointer.
@@ -24,34 +102,34 @@
- it was incorrectly assumed that the value on Lua stack was a string;
- luaL_error was used where lua_error was more appropriate;
-2007-01-18 Shmuel
+2007-01-18 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c (gsub): [API change undone].
* all source files: refactoring.
-2007-01-14 Shmuel
+2007-01-14 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c (gsub): [API change]:
a) 2-nd return of rep() is ignored --> API-compatible with string.gsub
b) argument `n' can be a function --> API extension wrt string.gsub
-2007-01-13 Shmuel
+2007-01-13 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c: refactoring.
* common.c, common.h: refactoring.
* common.h (REX_VERSION): a new #define.
* lpcre.c, lposix.c (REX_OPENLIB): using REX_VERSION.
-2007-01-12 Shmuel
+2007-01-12 Shmuel Zeigerman <shmuz@actcom.co.il>
* common.h, common.c: a nasty bug fixed.
* lpcre.c, lposix.c: version updated to 2.0.1.
-2007-01-10 Shmuel
+2007-01-10 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c: refactoring.
-2007-01-08 Shmuel
+2007-01-08 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c: refactoring.
@@ -60,18 +138,18 @@
* Add a top-level Makefile with all, clean and test targets.
* Split src/*.mak common parts into src/common.mak.
-2007-01-04 Shmuel
+2007-01-04 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c (Lpcre_gsub): unnecessary (though harmless) assignment removed.
* test/runtest.lua: extended the command-line interface.
* most files: the copyright notice changed to reference LICENSE file.
-2007-01-02 Shmuel
+2007-01-02 Shmuel Zeigerman <shmuz@actcom.co.il>
* common.c (CheckFunction, OptFunction): functions removed.
* lpcre.c (put_integer): function removed.
-2006-12-31 Shmuel
+2006-12-31 Shmuel Zeigerman <shmuz@actcom.co.il>
* common.c (udata_tostring): function removed.
* lpcre.c (Lpcre_tostring): added handling of deleted userdatum.
@@ -79,21 +157,21 @@
* lpcre.c: [API change] method `exec' now supports "named subpatterns".
* test/all_test.lua: renamed to runtest.lua.
-2006-12-30 Shmuel
+2006-12-30 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c: [API change] `versionPCRE' renamed to `version'.
-2006-12-29 Shmuel
+2006-12-29 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c: [API change] removed support of PCRE callout.
* test/pcre_sets.lua: removed testing of PCRE callout.
-2006-12-27 Shmuel
+2006-12-27 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre_f.c (config): [API change] the function accepts one optional
argument (a table), like the `flags' function.
-2006-12-26 Shmuel
+2006-12-26 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c (gsub): [API change] method become function.
* test/*.lua: modifying tests to reflect methods become functions.
@@ -104,16 +182,16 @@
* lpcre.c, lposix.c (checkarg_gsub): if the 3-rd argument is of type
"number" then it is converted to string (as in string.gsub).
-2006-12-25 Shmuel
+2006-12-25 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c (match, find, gmatch, split): [API change]
methods become functions.
-2006-12-23 Shmuel
+2006-12-23 Shmuel Zeigerman <shmuz@actcom.co.il>
* test/*.lua: refactoring.
-2006-12-22 Shmuel
+2006-12-22 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c (tfind and exec): [API change]
the return value of the underlying pcre_exec/regexec call
@@ -124,32 +202,32 @@
* lua/*.lua: deleted all lua files except for rex.lua.
* lua/rex.lua: fully rewritten; now contains wrappers for all methods.
-2006-12-21 Shmuel
+2006-12-21 Shmuel Zeigerman <shmuz@actcom.co.il>
* test/*.lua: added tests for `split' method.
* test/luatest.lua (print_results): function added.
* test/common_sets.lua: file added.
-2006-12-19 Shmuel
+2006-12-19 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.lua: `split' method added.
* lposix.lua: `split' method added.
* test/framework.lua: renamed to luatest.lua.
-2006-12-15 Shmuel
+2006-12-15 Shmuel Zeigerman <shmuz@actcom.co.il>
* test/*.lua: refactoring.
-2006-12-11 Shmuel
+2006-12-11 Shmuel Zeigerman <shmuz@actcom.co.il>
* test/*.lua: refactoring; deleting files; adding new files.
-2006-12-10 Shmuel
+2006-12-10 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c: gmatch bug fixed. Test case added.
* lpcre.c, lposix.c: gtfind bug fixed. Test case added.
-2006-12-09 Shmuel
+2006-12-09 Shmuel Zeigerman <shmuz@actcom.co.il>
* lua/gsub_test.lua, gsub_tstpsx.lua: 2 files deleted.
* lua/posix_sets.lua, pcre_sets.lua, framework.lua, all_test.lua:
@@ -159,27 +237,27 @@
is supplied as the 'n' parameter, then no iterations are done.
Test cases added to *.lua test files.
-2006-12-07 Shmuel
+2006-12-07 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c (gsub): if parameter rep is a function,
and its 2-nd return value (if present) is a string "break",
then gsub immediately returns.
* test/*.lua: refactoring.
-2006-12-06 Shmuel
+2006-12-06 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c: refactoring.
-2006-12-05 Shmuel
+2006-12-05 Shmuel Zeigerman <shmuz@actcom.co.il>
* lposix.c: cosmetics.
-2006-12-04 Shmuel
+2006-12-04 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c: introduced new macros: CAP_BEG, CAP_END and CAP_LEN.
* lpcre.c, lpcre_f.c, lposix.c: refactoring.
-2006-12-03 Shmuel
+2006-12-03 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c (Lpcre_gsub_func): an improvement.
* lposix.c (posix_gsub_func): an improvement.
@@ -188,7 +266,7 @@
* lpcre.c, lposix.c: gsub function become method.
* test/posix_test.lua, pcre_test.lua: removed tests for removed functions.
-2006-12-02 Shmuel
+2006-12-02 Shmuel Zeigerman <shmuz@actcom.co.il>
* common.c, common.h (TBuffer): moved here from lpcre.c.
* common.c, common.h (TFreeList): helper class created.
@@ -198,18 +276,18 @@
* test/gsub_tstpsx.lua: file added.
* test/rex_.lua: file deleted.
-2006-12-01 Shmuel
+2006-12-01 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c (TBuffer): helper class created.
* lpcre.c (Lpcre_gsub_func): many changes.
-2006-11-30 Shmuel
+2006-11-30 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c (TExecData): struct renamed to TCallout.
* lpcre.c (LpcreSetExecData): function renamed to SetupCallout.
* lpcre.c (Lpcre_gsub_func): added function gsub.
-2006-11-29 Shmuel
+2006-11-29 Shmuel Zeigerman <shmuz@actcom.co.il>
* test/*.lua: refactoring.
* lpcre.c, lposix.c: refactoring.
@@ -217,23 +295,23 @@
* lpcre.c, lposix.c (oldgmatch): renamed to tgfind.
* */*.lua: renamed: oldmatch -> tfind; oldgmatch -> tgfind.
-2006-11-28 Shmuel
+2006-11-28 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c (Lpcre_dfa_exec): one Lmalloc call instead of two.
-2006-11-27 Shmuel
+2006-11-27 Shmuel Zeigerman <shmuz@actcom.co.il>
* common.c (plainfind_func): rewritten to not use memicmp
* test/*.lua: every test returns number of failures
-2006-11-26 Shmuel
+2006-11-26 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c: all Check_arg_* functions renamed to Checkarg_*
* lpcre.c (Lpcre_dfa_exec): added 2 arguments to dfa_exec
* common.h (DIM): macro removed
* test/pcre_test.lua: tests for dfa_exec method added
-2006-11-25 Shmuel
+2006-11-25 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c: alpha -> beta.
* lpcre_f.c: file added (was: part of lpcre.c).
@@ -245,48 +323,48 @@
* test/*.lua: refactoring.
* lua/generic_gsub.lua: refactoring.
-2006-11-23 Shmuel
+2006-11-23 Shmuel Zeigerman <shmuz@actcom.co.il>
* test/*.lua: refactoring.
* lua/rex.lua, lua/rex_.lua (gsub): 6th and 7th arguments swapped.
-2006-11-22 Shmuel
+2006-11-22 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c (Check_arg_findmatch_func): 5th and 6th arguments swapped.
* lpcre.c (Check_arg_gmatch_func): 4th and 5th arguments swapped.
* test/posix_test.lua: file added.
* lposix.c: 2 bugs fixed.
-2006-11-21 Shmuel
+2006-11-21 Shmuel Zeigerman <shmuz@actcom.co.il>
* test/pcre_test.lua: "named subpatterns" tests added.
-2006-11-20 Shmuel
+2006-11-20 Shmuel Zeigerman <shmuz@actcom.co.il>
* common.c, common.h (plainfind_func): function added.
* lpcre.c, lposix.c (rex.plainfind): new function (from Lua side).
* test/framework.lua: file added.
* test/pcre_test.lua: file added.
-2006-11-19 Shmuel
+2006-11-19 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c (Lpcre_config): new function (pcre.config from Lua side).
* lpcre.c: callout handling improved.
-2006-11-18 Shmuel
+2006-11-18 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c: old 'gmatch' method put back; renamed into 'oldgmatch'.
-2006-11-17 Shmuel
+2006-11-17 Shmuel Zeigerman <shmuz@actcom.co.il>
* windows/bcc32/make_bcc.mak: deleted -DCOMPAT51, added -D$(CMDLINE)
-2006-11-16 Shmuel
+2006-11-16 Shmuel Zeigerman <shmuz@actcom.co.il>
* gsub_test.lua (PatternLua2Pcre): function renamed into 'pat2pcre'.
* lua/pat2pcre.lua: file added (was part of gsub_test.lua).
-2006-11-15 Shmuel
+2006-11-15 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c, lposix.c: Lua API has changed:
* 'match' method renamed to 'oldmatch'
@@ -300,18 +378,18 @@
* generic_gsub.lua: rex.oldmatch is used instead of rex.match.
* test/test1.lua: file added
-2006-11-12 Shmuel
+2006-11-12 Shmuel Zeigerman <shmuz@actcom.co.il>
* rex.lua (r:gmatch): metamethod added.
* lpcre.c (Lpcre_maketables): function API simplified.
* lpcre.c (Lpcre_getargs): function removed.
* lpcre.c (LpcreGetExecParams): renamed from LpcreProcessExecParams.
-2006-11-09 Shmuel
+2006-11-09 Shmuel Zeigerman <shmuz@actcom.co.il>
* lua/bit.lua: file removed.
-2006-11-05 Shmuel
+2006-11-05 Shmuel Zeigerman <shmuz@actcom.co.il>
* rex.lua (gsub): a few structural optimizations.
* rex.lua (gmatch): a bugfix.
@@ -320,7 +398,7 @@
* new_gsub.lua: file deleted.
* lua/bit.lua: file added.
-2006-11-04 Reuben
+2006-11-04 Reuben Thomas <rrt@sc3d.org>
* rex.lua (gmatch): function rewritten to be compatible with
string.gmatch.
@@ -328,19 +406,19 @@
* lpcre.c (Lpcre_gmatch): function deleted.
* lposix.c (posix_gmatch): function deleted.
-2006-11-04 Shmuel
+2006-11-04 Shmuel Zeigerman <shmuz@actcom.co.il>
* gsub.lua, new_gsub.lua, gsub_test.lua, rex.lua: bugfixes.
-2006-10-18 Shmuel
+2006-10-18 Shmuel Zeigerman <shmuz@actcom.co.il>
* [Windows] DLL builds need lua5.1.dll rather than lua51.dll.
-2006-10-02 Shmuel
+2006-10-02 Shmuel Zeigerman <shmuz@actcom.co.il>
* Support of Lua 5.0 was dropped.
-2006-09-03 Shmuel
+2006-09-03 Shmuel Zeigerman <shmuz@actcom.co.il>
* common.h (REX_LIB_API): renamed into REX_API.
* common.h (flags_pair): renamed into flag_pair.
@@ -357,51 +435,51 @@
* added directory test/Spencer (containing test.lua).
* test/Spencer/test.lua: file returns a function.
-2006-08-27 Shmuel
+2006-08-27 Shmuel Zeigerman <shmuz@actcom.co.il>
* (local) merge with the version put into CVS by R.Thomas.
* common.h: added conditional #define's for lua_pushinteger and
lua_tointeger (needed to compile with Lua 5.0).
-2006-08-18 Shmuel
+2006-08-18 Shmuel Zeigerman <shmuz@actcom.co.il>
* common.c, lpcre.c, lposix.c: lua_pushnumber replaced with
lua_pushinteger where appropriate (in many places).
* lpcre.c (put_number): function renamed into put_integer.
-2006-06-17 Shmuel
+2006-06-17 Shmuel Zeigerman <shmuz@actcom.co.il>
* lpcre.c (TPcreExecParam): `use_callout' struct member was eliminated.
Instead, a special value of function reference (LUA_NOREF) is used.
-2006-04-01 Reuben
+2006-04-01 Reuben Thomas <rrt@sc3d.org>
* lpcre.c, common.c: Remove trailing whitespace.
* lpcre.c (Lpcre_gmatch): Remove unnecessary limit variable.
-2006-02-17 Shmuel
+2006-02-17 Shmuel Zeigerman <shmuz@actcom.co.il>
* common.c, common.h (L_lua_error): function deleted.
* common.c, lpcre.c, lposix.c: luaL_error used in place of L_lua_error.
* lposix.c (posix2): struct renamed into TPosix.
* lposix.c (posix_comp): lua_newuserdata() used instead of Lmalloc().
-2005-12-26 Shmuel
+2005-12-26 Shmuel Zeigerman <shmuz@actcom.co.il>
* Separate makefiles for POSIX and PCRE. No config file.
-2005-11-28 Shmuel
+2005-11-28 Shmuel Zeigerman <shmuz@actcom.co.il>
* common.h (REXLIB_API): macro renamed into REX_LIB_API.
* lposix.c (LREXLIB_POSIX_EXT): macro renamed into REX_POSIX_EXT.
-2005-11-26 Shmuel
+2005-11-26 Shmuel Zeigerman <shmuz@actcom.co.il>
* common.h (LUAL_REGISTER): macro renamed into REX_REGISTER.
* lpcre.c (LUAOPEN_LIB): macro renamed into REX_OPENLIB.
* lpcre.c (LIBNAME): macro renamed into REX_LIBNAME.
-2005-11-15 Shmuel
+2005-11-15 Shmuel Zeigerman <shmuz@actcom.co.il>
* common.c, common.h, lposix.c, lpcre.c: new files (lrexlib.c was
splitted); POSIX and PCRE parts now live in their own
@@ -417,11 +495,11 @@
have aliases (flags and new, correspondently).
* lpcre.c (Lpcre_vers): function renamed into Lpcre_version.
-2005-11-12 Shmuel
+2005-11-12 Shmuel Zeigerman <shmuz@actcom.co.il>
* lrexlib.c (LUAL_REGISTER): new macro.
-2005-11-10 Shmuel
+2005-11-10 Shmuel Zeigerman <shmuz@actcom.co.il>
* lrexlib.c (posix_match_generic): return (on Lua-side) an
additional value (the return code of regexec).
@@ -430,7 +508,7 @@
* lrexlib.c (Lpcre_dfa_exec): return (on Lua-side) an additional
value (the return code of pcre_dfa_exec).
-2005-10-29 Shmuel
+2005-10-29 Shmuel Zeigerman <shmuz@actcom.co.il>
* lrexlib.c (get_flags): the function now accepts one parameter
from the Lua stack (a table). Was: no parameters. (This
@@ -445,7 +523,7 @@
* lrexlib.c: added all missing PCRE flags starting from PCRE
version 4.
-2005-10-25 Shmuel
+2005-10-25 Shmuel Zeigerman <shmuz@actcom.co.il>
* lrexlib.c (DIM): new macro.
* lrexlib.c (pcre2): renamed into TPcre.
@@ -454,7 +532,7 @@
* lrexlib.c (Lpcre_dfa_exec): new function. Lua-side: dfa_exec.
* lrexlib.c (Lpcre_dfa_restart): new function. Lua-side: dfa_restart.
-2005-10-23 Shmuel
+2005-10-23 Shmuel Zeigerman <shmuz@actcom.co.il>
* lrexlib.c (TPcreCalloutData): new struct.
* lrexlib.c (Lpcre_callout): new function.
@@ -462,11 +540,11 @@
* lrexlib.c: added new PCRE flags (from PCRE versions 5 and 6).
* lrexlib.c (put_number): new helper function.
-2005-05-27 Shmuel
+2005-05-27 Shmuel Zeigerman <shmuz@actcom.co.il>
* lrexlib.c: updated to work with Compat-5.1.
-2004-12-18 Shmuel
+2004-12-18 Shmuel Zeigerman <shmuz@actcom.co.il>
* lrexlib.c (regex_tostring): error handling added.
* lrexlib.c (regex_tostring): renamed into udata_tostring.
@@ -489,16 +567,16 @@
* lrexlib.c (Lpcre_push_substrings): added support for "named
subpatterns".
-2004-09-15 Shmuel
+2004-09-15 Shmuel Zeigerman <shmuz@actcom.co.il>
* gsub.lua (is_odd): Function removed; math.mod used instead.
-2004-08-24 Shmuel
+2004-08-24 Shmuel Zeigerman <shmuz@actcom.co.il>
* ChangeLog: File added.
* NEWS: File added.
-2004-08-12 Shmuel
+2004-08-12 Shmuel Zeigerman <shmuz@actcom.co.il>
* lrexlib.c (posix_exec, Lpcre_exec): New C functions, that
correspond to the new lua function 'r:exec'.
@@ -515,17 +593,17 @@
(posix_push_offsets, Lpcre_push_offsets): New functions.
* gsub.lua: File added.
-2004-08-11 Reuben
+2004-08-11 Reuben Thomas <rrt@sc3d.org>
* config: Many changes.
* Makefile: Many changes.
-2004-08-11 Shmuel
+2004-08-11 Shmuel Zeigerman <shmuz@actcom.co.il>
* config: File added + many changes.
* Makefile: Many changes.
-2004-07-15 Shmuel
+2004-07-15 Shmuel Zeigerman <shmuz@actcom.co.il>
* lrexlib.c (posix_get_flags, Lpcre_get_flags, Lpcre_vers): New C
functions, that correspond to new lua functions
diff --git a/LICENSE b/LICENSE
index ab51034..dbb0f7a 100755
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-License of Lrexlib release 2.1
+License of Lrexlib release 2.2
------------------------------
Copyright (C) Reuben Thomas 2000-2007
diff --git a/README b/README
index d4cbfb8..2891103 100755
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
- Lua rexlib 2.1
+ Lua rexlib 2.2
--------------
by Reuben Thomas (rrt@sc3d.org)
diff --git a/doc/index.txt b/doc/index.txt
index bdaa8d9..e399316 100755
--- a/doc/index.txt
+++ b/doc/index.txt
@@ -1,4 +1,4 @@
-Lrexlib 2.1
+Lrexlib 2.2
===========
| by Reuben Thomas (rrt@sc3d.org)
diff --git a/doc/license.html b/doc/license.html
index 8967e73..864338f 100755
--- a/doc/license.html
+++ b/doc/license.html
@@ -5,7 +5,7 @@
</head>
<body>
-<h2>Lrexlib 2.1</h2>
+<h2>Lrexlib 2.2</h2>
<p>Copyright &copy; Reuben Thomas 2000-2007<br>
Copyright &copy; Shmuel Zeigerman 2004-2007
diff --git a/doc/manual.html b/doc/manual.html
index e76a601..3fbd173 100755
--- a/doc/manual.html
+++ b/doc/manual.html
@@ -4,12 +4,12 @@
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="Docutils 0.5: http://docutils.sourceforge.net/" />
-<title>Lrexlib 2.1 Reference Manual</title>
+<title>Lrexlib 2.2 Reference Manual</title>
<link rel="stylesheet" href="lrexlib.css" type="text/css" />
</head>
<body>
-<div class="document" id="lrexlib-2-1-reference-manual">
-<h1 class="title">Lrexlib 2.1 Reference Manual</h1>
+<div class="document" id="lrexlib-2-2-reference-manual">
+<h1 class="title">Lrexlib 2.2 Reference Manual</h1>
<div class="contents topic">
<p class="topic-title first"><a id="table-of-contents" name="table-of-contents">Table of Contents</a></p>
@@ -308,8 +308,8 @@ till the subject fails to match.</p>
<div class="section">
<h3><a class="toc-backref" href="#id7" id="gsub" name="gsub">gsub</a></h3>
<p><tt class="funcdef docutils literal"><span class="pre">rex.gsub</span> <span class="pre">(subj,</span> <span class="pre">patt,</span> <span class="pre">repl,</span> <span class="pre">[n],</span> <span class="pre">[cf],</span> <span class="pre">[ef],</span> <span class="pre">[lo])</span></tt></p>
-<p>The function searches for all matches of the pattern <em>patt</em> in the string <em>subj</em>
-and substitutes the found matches according to the parameter <em>repl</em> (see details
+<p>This function searches for all matches of the pattern <em>patt</em> in the string
+<em>subj</em> and replaces them according to the parameters <em>repl</em> and <em>n</em> (see details
below).</p>
<p>PCRE: A locale <em>lo</em> may be specified.</p>
<blockquote>
@@ -345,8 +345,8 @@ below).</p>
</tr>
<tr><td>[n]</td>
<td>maximum number of matches to search
-for; unlimited if not supplied</td>
-<td>number</td>
+for, or control function, or nil</td>
+<td>number or function</td>
<td><tt class="docutils literal"><span class="pre">nil</span></tt></td>
</tr>
<tr><td>[cf]</td>
@@ -372,11 +372,14 @@ for; unlimited if not supplied</td>
<dd><ol class="first last arabic simple">
<li>The subject string with the substitutions made.</li>
<li>Number of matches found.</li>
+<li>Number of substitutions made.</li>
</ol>
</dd>
<dt><strong>Details:</strong></dt>
-<dd><p class="first">The parameter <em>repl</em> can be either a string, a function or a table. The
-function behaves differently depending on the <em>repl</em> type:</p>
+<dd><p class="first">The parameter <em>repl</em> can be either a string, a function or a table.
+On each match made, it is converted into a value <em>repl_out</em> that may be used
+for the replacement.</p>
+<p><em>repl_out</em> is generated differently depending on the type of <em>repl</em>:</p>
<ol class="arabic simple">
<li>If <em>repl</em> is a <em>string</em> then it is treated as a template for substitution,
where the %X occurences in <em>repl</em> are handled in a special way, depending
@@ -398,48 +401,75 @@ string</li>
</ul>
</li>
<li>if X is any non-digit character then %X is substituted by X</li>
-<li>all parts of <em>repl</em> other than %X are copied to the output string
-verbatim.</li>
</ul>
+<p>All parts of <em>repl</em> other than %X are copied to <em>repl_out</em> verbatim.</p>
</blockquote>
<ol class="arabic simple" start="2">
-<li>If <em>repl</em> is a <em>function</em> then it gets called on each match with the
+<li>If <em>repl</em> is a <em>function</em> then it is called on each match with the
submatches passed as parameters (if there are no submatches then the entire
-match is passed as the only parameter). The substitution string is derived
-depending on the first return value of function <em>repl</em>:</li>
+match is passed as the only parameter). <em>repl_out</em> is the return value of
+the <em>repl</em> call, and is interpreted as follows:</li>
</ol>
<blockquote>
<ul class="simple">
-<li>if it is a string then it is used as a substitution for the current match.</li>
-<li>if it is either of nothing, <tt class="docutils literal"><span class="pre">nil</span></tt> or <tt class="docutils literal"><span class="pre">false</span></tt> then no substitution is
-made.</li>
-<li>values of other types generate an error.</li>
-</ul>
-<p>Though <a class="reference" href="#gsub">gsub</a> is in general consistent with the API and behavior of Lua's
-<em>string.gsub</em>, it has one extension with regards to <em>string.gsub</em> behavior:</p>
-<ul class="simple">
-<li>if function <em>repl</em> returns more than one value and its second return value
-is the literal string <em>&quot;break&quot;</em>, then <a class="reference" href="#gsub">gsub</a> stops searching for further
-matches in the subject and returns.</li>
+<li>if it is a string or a number (coerced to a string), then the replacement
+value is that string;</li>
+<li>if it is a <tt class="docutils literal"><span class="pre">nil</span></tt> or a <tt class="docutils literal"><span class="pre">false</span></tt>, then no replacement is to be done;</li>
</ul>
</blockquote>
<ol class="arabic simple" start="3">
-<li>If <em>repl</em> is a <em>table</em> then the first submatch (or the entire match if
-there are no submatches) is used as the key and the value stored in <em>repl</em>
-under that key is used for substitution depending on its type.</li>
+<li>If <em>repl</em> is a table then <em>repl_out</em> is <em>repl</em> [m1], where m1 is the first
+submatch (or the entire match if there are no submatches), following the
+same rules as for the return value of <em>repl</em> call, described in the above
+paragraph.</li>
</ol>
-<blockquote class="last">
+<p>Note: Under some circumstances, the value of <em>repl_out</em> may be ignored; see
+<a class="reference" href="#below">below</a>.</p>
+<p>gsub behaves differently depending on the type of <em>n</em>:</p>
+<ol class="last arabic simple">
+<li>If <em>n</em> is a <em>number</em> then it is treated as the maximum number of matches
+to search for (an omitted or <tt class="docutils literal"><span class="pre">nil</span></tt> value means an unlimited number of
+matches). On each match, the replacement value is the <em>repl_out</em> string
+(see above).</li>
+</ol>
+</dd>
+</dl>
+<blockquote id="below">
+<ol class="arabic" start="2">
+<li><p class="first">If <em>n</em> is a function, then it is called on each match, after <em>repl_out</em> is
+produced (so if <em>repl</em> is a function, it will be called prior to the <em>n</em>
+call).</p>
+<p><em>n</em> receives 3 arguments and returns 2 values. Its arguments are:</p>
+<blockquote>
+<ol class="arabic simple">
+<li>The start offset of the match (a number)</li>
+<li>The end offset of the match (a number)</li>
+<li><em>repl_out</em></li>
+</ol>
+</blockquote>
+<p>The type of its first return controls the replacement produced by gsub for
+the current match:</p>
+<blockquote>
<ul class="simple">
-<li>If no value is stored under the key but <em>repl</em> has a metatable with the
-<em>__index</em> field set then the correspondent metamethod will be called for
-obtaining the value.</li>
-<li>The obtained value is used for the substitution following exactly same
-rules as for the first return value of <em>repl</em> described in the above
-paragraph.</li>
+<li><tt class="docutils literal"><span class="pre">true</span></tt> -- replace/don't replace, according to <em>repl_out</em>;</li>
+<li><tt class="docutils literal"><span class="pre">nil</span></tt>/<tt class="docutils literal"><span class="pre">false</span></tt> -- don't replace;</li>
+<li>a string (or a number coerced to a string) -- replace by that string;</li>
</ul>
</blockquote>
-</dd>
-</dl>
+<p>The type of its second return controls gsub behavior after the current
+match is handled:</p>
+<blockquote>
+<ul class="simple">
+<li><tt class="docutils literal"><span class="pre">nil</span></tt>/<tt class="docutils literal"><span class="pre">false</span></tt> -- no changes: <em>n</em> will be called on the next match;</li>
+<li><tt class="docutils literal"><span class="pre">true</span></tt> -- search for an unlimited number of matches; <em>n</em> will not be
+called anymore;</li>
+<li>a number -- maximum number of matches to search for, beginning from the
+next match; <em>n</em> will not be called anymore;</li>
+</ul>
+</blockquote>
+</li>
+</ol>
+</blockquote>
</div>
<hr class="docutils" />
<div class="section">
@@ -678,7 +708,7 @@ documentation.</p>
<hr class="docutils" />
<div class="section">
<h3><a class="toc-backref" href="#id12" id="config" name="config">config</a></h3>
-<p>[PCRE only. See <em>pcre_config</em> in the <a class="reference" href="http://www.pcre.org/pcre.txt">PCRE</a> docs.]</p>
+<p>[PCRE 4.0 and later. See <em>pcre_config</em> in the <a class="reference" href="http://www.pcre.org/pcre.txt">PCRE</a> docs.]</p>
<p><tt class="funcdef docutils literal"><span class="pre">rex.config</span> <span class="pre">([tb])</span></tt></p>
<p>This function returns a table containing the values of the configuration
parameters used at PCRE library build-time. Those parameters (numbers) are
@@ -956,7 +986,7 @@ and 25 then the function returns the following: 10, { 25,20,15 }, 3.</dd>
<hr class="docutils" />
<div class="section">
<h2><a class="toc-backref" href="#id18" id="incompatibilities-with-the-previous-versions" name="incompatibilities-with-the-previous-versions">Incompatibilities with the Previous Versions</a></h2>
-<p><strong>The following changes are incompatible with Lrexlib version 1.19:</strong></p>
+<p><strong>Incompatibilities with the version 1.19:</strong></p>
<blockquote>
<ol class="arabic simple">
<li>Lua 5.1 is required</li>
@@ -970,7 +1000,7 @@ and 25 then the function returns the following: 10, { 25,20,15 }, 3.</dd>
subpatterns</em> (PCRE only)</li>
</ol>
</blockquote>
-<p><strong>The following changes are incompatible with Lrexlib version 2.0:</strong></p>
+<p><strong>Incompatibilities with the version 2.0:</strong></p>
<blockquote>
<ol class="arabic simple">
<li><a class="reference" href="#match">match</a>, <a class="reference" href="#find">find</a>, <a class="reference" href="#tfind">tfind</a>, <a class="reference" href="#exec">exec</a>, <a class="reference" href="#dfa-exec">dfa_exec</a>: only one value (a <tt class="docutils literal"><span class="pre">nil</span></tt>) is
@@ -978,11 +1008,17 @@ returned when the subject does not match the pattern. Any other failure
generates an error.</li>
</ol>
</blockquote>
+<p><strong>Incompatibilities with the version 2.1:</strong></p>
+<blockquote>
+<ol class="arabic simple">
+<li><a class="reference" href="#gsub">gsub</a>: a special &quot;break&quot; return of <em>repl</em> function is deprecated.</li>
+</ol>
+</blockquote>
</div>
</div>
<div class="footer">
<hr class="footer" />
-Generated on: 2007-02-12 18:37 UTC.
+Generated on: 2007-03-17 09:55 UTC.
</div>
</body>
diff --git a/doc/manual.txt b/doc/manual.txt
index b2e4f52..e9bca04 100755
--- a/doc/manual.txt
+++ b/doc/manual.txt
@@ -1,6 +1,6 @@
.. role:: funcdef(literal)
-Lrexlib 2.1 Reference Manual
+Lrexlib 2.2 Reference Manual
============================
.. contents:: Table of Contents
@@ -184,8 +184,8 @@ gsub
:funcdef:`rex.gsub (subj, patt, repl, [n], [cf], [ef], [lo])`
-The function searches for all matches of the pattern *patt* in the string *subj*
-and substitutes the found matches according to the parameter *repl* (see details
+This function searches for all matches of the pattern *patt* in the string
+*subj* and replaces them according to the parameters *repl* and *n* (see details
below).
PCRE: A locale *lo* may be specified.
@@ -199,8 +199,8 @@ PCRE: A locale *lo* may be specified.
+---------+-----------------------------------+-------------------------+-------------+
| repl |substitution source |string, function or table| n/a |
+---------+-----------------------------------+-------------------------+-------------+
- | [n] |maximum number of matches to search| number | ``nil`` |
- | |for; unlimited if not supplied | | |
+ | [n] |maximum number of matches to search| number or function | ``nil`` |
+ | |for, or control function, or nil | | |
+---------+-----------------------------------+-------------------------+-------------+
| [cf] |compilation flags (bitwise OR) | number | cf_ |
+---------+-----------------------------------+-------------------------+-------------+
@@ -212,10 +212,14 @@ PCRE: A locale *lo* may be specified.
**Returns:**
1. The subject string with the substitutions made.
2. Number of matches found.
+ 3. Number of substitutions made.
**Details:**
- The parameter *repl* can be either a string, a function or a table. The
- function behaves differently depending on the *repl* type:
+ The parameter *repl* can be either a string, a function or a table.
+ On each match made, it is converted into a value *repl_out* that may be used
+ for the replacement.
+
+ *repl_out* is generated differently depending on the type of *repl*:
1. If *repl* is a *string* then it is treated as a template for substitution,
where the %X occurences in *repl* are handled in a special way, depending
@@ -235,37 +239,60 @@ PCRE: A locale *lo* may be specified.
string
* if X is any non-digit character then %X is substituted by X
- * all parts of *repl* other than %X are copied to the output string
- verbatim.
- 2. If *repl* is a *function* then it gets called on each match with the
+ All parts of *repl* other than %X are copied to *repl_out* verbatim.
+
+ 2. If *repl* is a *function* then it is called on each match with the
submatches passed as parameters (if there are no submatches then the entire
- match is passed as the only parameter). The substitution string is derived
- depending on the first return value of function *repl*:
+ match is passed as the only parameter). *repl_out* is the return value of
+ the *repl* call, and is interpreted as follows:
+
+ * if it is a string or a number (coerced to a string), then the replacement
+ value is that string;
+ * if it is a ``nil`` or a ``false``, then no replacement is to be done;
+
+ 3. If *repl* is a table then *repl_out* is *repl* [m1], where m1 is the first
+ submatch (or the entire match if there are no submatches), following the
+ same rules as for the return value of *repl* call, described in the above
+ paragraph.
+
+ Note: Under some circumstances, the value of *repl_out* may be ignored; see
+ below_.
+
+ gsub behaves differently depending on the type of *n*:
- * if it is a string then it is used as a substitution for the current match.
- * if it is either of nothing, ``nil`` or ``false`` then no substitution is
- made.
- * values of other types generate an error.
+ 1. If *n* is a *number* then it is treated as the maximum number of matches
+ to search for (an omitted or ``nil`` value means an unlimited number of
+ matches). On each match, the replacement value is the *repl_out* string
+ (see above).
- Though gsub_ is in general consistent with the API and behavior of Lua's
- *string.gsub*, it has one extension with regards to *string.gsub* behavior:
+.. _below:
- * if function *repl* returns more than one value and its second return value
- is the literal string *"break"*, then gsub_ stops searching for further
- matches in the subject and returns.
+ 2. If *n* is a function, then it is called on each match, after *repl_out* is
+ produced (so if *repl* is a function, it will be called prior to the *n*
+ call).
- 3. If *repl* is a *table* then the first submatch (or the entire match if
- there are no submatches) is used as the key and the value stored in *repl*
- under that key is used for substitution depending on its type.
+ *n* receives 3 arguments and returns 2 values. Its arguments are:
- * If no value is stored under the key but *repl* has a metatable with the
- *__index* field set then the correspondent metamethod will be called for
- obtaining the value.
+ 1. The start offset of the match (a number)
+ 2. The end offset of the match (a number)
+ 3. *repl_out*
- * The obtained value is used for the substitution following exactly same
- rules as for the first return value of *repl* described in the above
- paragraph.
+ The type of its first return controls the replacement produced by gsub for
+ the current match:
+
+ * ``true`` -- replace/don't replace, according to *repl_out*;
+ * ``nil``/``false`` -- don't replace;
+ * a string (or a number coerced to a string) -- replace by that string;
+
+ The type of its second return controls gsub behavior after the current
+ match is handled:
+
+ * ``nil``/``false`` -- no changes: *n* will be called on the next match;
+ * ``true`` -- search for an unlimited number of matches; *n* will not be
+ called anymore;
+ * a number -- maximum number of matches to search for, beginning from the
+ next match; *n* will not be called anymore;
------------------------------------------------------------
@@ -407,7 +434,7 @@ documentation.
config
------
-[PCRE only. See *pcre_config* in the PCRE_ docs.]
+[PCRE 4.0 and later. See *pcre_config* in the PCRE_ docs.]
:funcdef:`rex.config ([tb])`
@@ -573,7 +600,7 @@ string *subj*, using a DFA matching algorithm.
Incompatibilities with the Previous Versions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-**The following changes are incompatible with Lrexlib version 1.19:**
+**Incompatibilities with the version 1.19:**
1. Lua 5.1 is required
#. Functions ``newPCRE`` and ``newPOSIX`` renamed to new_
@@ -585,9 +612,13 @@ Incompatibilities with the Previous Versions
#. Method exec_: the returned table may additionally contain *named
subpatterns* (PCRE only)
-**The following changes are incompatible with Lrexlib version 2.0:**
+**Incompatibilities with the version 2.0:**
1. match_, find_, tfind_, exec_, dfa_exec_: only one value (a ``nil``) is
returned when the subject does not match the pattern. Any other failure
generates an error.
+**Incompatibilities with the version 2.1:**
+
+ 1. gsub_: a special "break" return of *repl* function is deprecated.
+
diff --git a/src/common.c b/src/common.c
index 1b554ac..7b5736b 100755
--- a/src/common.c
+++ b/src/common.c
@@ -81,45 +81,15 @@ void CheckStack (lua_State *L, int extraslots)
}
int OptLimit (lua_State *L, int pos) {
- if (!lua_isnoneornil (L, pos)) {
- int a = luaL_checkint (L, pos);
+ if (lua_isnoneornil (L, pos))
+ return GSUB_UNLIMITED;
+ if (lua_isfunction (L, pos))
+ return GSUB_CONDITIONAL;
+ if (lua_isnumber (L, pos)) {
+ int a = lua_tointeger (L, pos);
return a < 0 ? 0 : a;
}
- return -1;
-}
-
-/* function plainfind (s, p, [st], [ci]) */
-int plainfind_func (lua_State *L) {
- size_t textlen, patlen;
- const char *text = luaL_checklstring (L, 1, &textlen);
- const char *pattern = luaL_checklstring (L, 2, &patlen);
- const char *from = text + get_startoffset (L, 3, textlen);
- int ci = lua_toboolean (L, 4);
- const char *end = text + textlen;
-
- for (; from + patlen <= end; ++from) {
- const char *f = from, *p = pattern;
- size_t len = patlen + 1;
- if (ci) {
- while (--len) {
- if (toupper (*f++) != toupper (*p++))
- break;
- }
- }
- else {
- while (--len) {
- if (*f++ != *p++)
- break;
- }
- }
- if (len == 0) {
- lua_pushinteger (L, from - text + 1);
- lua_pushinteger (L, from - text + patlen);
- return 2;
- }
- }
- lua_pushnil (L);
- return 1;
+ return luaL_argerror (L, pos, "number or function expected");
}
/* Classes */
@@ -184,10 +154,18 @@ void buffer_free (TBuffer *buf) {
free (buf->arr);
}
+void buffer_clear (TBuffer *buf) {
+ buf->top = 0;
+}
+
void buffer_pushresult (TBuffer *buf) {
lua_pushlstring (buf->L, buf->arr, buf->top);
}
+void buffer_addbuffer (TBuffer *trg, TBuffer *src) {
+ buffer_addlstring (trg, src->arr, src->top);
+}
+
void buffer_addlstring (TBuffer *buf, const void *src, size_t sz) {
size_t newtop = buf->top + sz;
if (newtop > buf->size) {
@@ -209,14 +187,14 @@ void buffer_addvalue (TBuffer *buf, int stackpos) {
buffer_addlstring (buf, p, len);
}
-void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len) {
+static void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len) {
size_t header[2] = { ID_STRING };
header[1] = len;
buffer_addlstring (buf, header, sizeof (header));
buffer_addlstring (buf, src, len);
}
-void bufferZ_addnum (TBuffer *buf, size_t num) {
+static void bufferZ_addnum (TBuffer *buf, size_t num) {
size_t header[2] = { ID_NUMBER };
header[1] = num;
buffer_addlstring (buf, header, sizeof (header));
diff --git a/src/common.h b/src/common.h
index 0ee23cd..f9abf2c 100755
--- a/src/common.h
+++ b/src/common.h
@@ -6,7 +6,7 @@
#include "lua.h"
-#define REX_VERSION "Lrexlib 2.1.0"
+#define REX_VERSION "Lrexlib 2.2.0 beta"
/* REX_API can be overridden from the command line or Makefile */
#ifndef REX_API
@@ -24,10 +24,29 @@ int get_startoffset (lua_State *L, int stackpos, size_t len);
void *Lmalloc (lua_State *L, size_t size);
void CheckStack (lua_State *L, int extraslots);
int OptLimit (lua_State *L, int pos);
-int plainfind_func (lua_State *L);
/* Classes */
+typedef struct { /* compile arguments */
+ const char * pattern;
+ size_t patlen;
+ int cflags;
+ const char * locale;
+} TArgComp;
+
+typedef struct { /* exec arguments */
+ const char * text;
+ size_t textlen;
+ int startoffset;
+ int eflags;
+ int funcpos;
+ int maxmatch;
+ int funcpos2; /* used with gsub */
+ int reptype; /* used with gsub */
+ size_t ovecsize; /* used with dfa_exec */
+ size_t wscount; /* used with dfa_exec */
+} TArgExec;
+
struct tagFreeList; /* forward declaration */
struct tagBuffer {
@@ -52,6 +71,8 @@ void freelist_free (TFreeList *fl);
void buffer_init (TBuffer *buf, size_t sz, lua_State *L, TFreeList *fl);
void buffer_free (TBuffer *buf);
+void buffer_clear (TBuffer *buf);
+void buffer_addbuffer (TBuffer *trg, TBuffer *src);
void buffer_addlstring (TBuffer *buf, const void *src, size_t sz);
void buffer_addvalue (TBuffer *buf, int stackpos);
void buffer_pushresult (TBuffer *buf);
@@ -59,4 +80,10 @@ void buffer_pushresult (TBuffer *buf);
void bufferZ_putrepstring (TBuffer *buf, int reppos, int nsub);
int bufferZ_next (TBuffer *buf, size_t *iter, size_t *len, const char **str);
+/* These are the special values for maxmatch in gsub.
+ * They all must be negative.
+ */
+#define GSUB_UNLIMITED -1
+#define GSUB_CONDITIONAL -2
+
#endif
diff --git a/src/lpcre.c b/src/lpcre.c
index 7e34b77..c313465 100755
--- a/src/lpcre.c
+++ b/src/lpcre.c
@@ -12,7 +12,7 @@
#include "common.h"
extern int Lpcre_get_flags (lua_State *L);
extern int Lpcre_config (lua_State *L);
-extern int generate_error (lua_State *L, int err_code);
+extern flag_pair pcre_error_flags[];
/* These 2 settings may be redefined from the command-line or the makefile.
* They should be kept in sync between themselves and with the target name.
@@ -24,8 +24,8 @@ extern int generate_error (lua_State *L, int err_code);
# define REX_OPENLIB luaopen_rex_pcre
#endif
-const char pcre_typename[] = REX_LIBNAME"_regex";
-const char *pcre_handle = pcre_typename;
+#define CFLAGS_DEFAULT 0
+#define EFLAGS_DEFAULT 0
#define CODE_NOMATCH PCRE_ERROR_NOMATCH
#define IS_MATCH(res) ((res) >= 0)
@@ -45,9 +45,15 @@ const char *pcre_handle = pcre_typename;
#define PUSH_END(L,ud,offs,n) lua_pushinteger(L, (offs) + SUB_END(ud,n))
#define PUSH_OFFSETS(L,ud,offs,n) (PUSH_START(L,ud,offs,n), PUSH_END(L,ud,offs,n))
-/* Data Types
- ******************************************************************************
- */
+#define BASE(st) 0
+#define PULL(st,from) (st = (from))
+#define OPTLOCALE(trg,L,pos) (trg = luaL_optstring (L, pos, NULL))
+#if PCRE_MAJOR >= 4
+# define DO_NAMED_SUBPATTERNS do_named_subpatterns
+#else
+# define DO_NAMED_SUBPATTERNS(L,ud,text) ((void)L)
+#endif
+
typedef struct {
pcre * pr;
@@ -58,94 +64,36 @@ typedef struct {
int freed;
} TPcre;
-typedef struct { /* pcre_compile arguments */
- const char * pattern;
- size_t patlen;
- int cflags;
- const char * locale;
-} TArgComp;
-
-typedef struct { /* pcre_exec arguments */
- TPcre * ud;
- const char * text;
- size_t textlen;
- int startoffset;
- int eflags;
- int funcpos;
- int maxmatch;
- int reptype; /* used with gsub */
- size_t ovecsize; /* used with dfa_exec */
- size_t wscount; /* used with dfa_exec */
-} TArgExec;
+#define TUserdata TPcre
+#include "algo.h"
+
+const char pcre_typename[] = REX_LIBNAME"_regex";
+const char *pcre_handle = pcre_typename;
/* Functions
******************************************************************************
*/
-static TPcre* check_ud (lua_State *L, int stackpos) {
- return (TPcre *)luaL_checkudata (L, stackpos, pcre_handle);
-}
-
-static void checkarg_new (lua_State *L, TArgComp *argC) {
- argC->pattern = luaL_checkstring (L, 1);
- argC->cflags = luaL_optint (L, 2, 0);
- argC->locale = luaL_optstring (L, 3, NULL);
-}
-
-/* function gsub (s, patt, f, [n], [cf], [ef], [lo]) */
-static void checkarg_gsub (lua_State *L, TArgComp *argC, TArgExec *argE) {
- argE->text = luaL_checklstring (L, 1, &argE->textlen);
- argC->pattern = luaL_checkstring (L, 2);
- lua_tostring (L, 3); /* converts number (if any) to string */
- argE->reptype = lua_type (L, 3);
- if (argE->reptype != LUA_TSTRING && argE->reptype != LUA_TTABLE &&
- argE->reptype != LUA_TFUNCTION) {
- luaL_argerror (L, 3, "must be string, table or function");
- }
- argE->funcpos = 3;
- argE->maxmatch = OptLimit (L, 4);
- argC->cflags = luaL_optint (L, 5, 0);
- argE->eflags = luaL_optint (L, 6, 0);
- argC->locale = luaL_optstring (L, 7, NULL);
-}
-
-/* method r:tfind (s, [st], [ef]) */
-/* method r:exec (s, [st], [ef]) */
-static void checkarg_tfind (lua_State *L, TArgExec *argE) {
- argE->ud = check_ud (L, 1);
- argE->text = luaL_checklstring (L, 2, &argE->textlen);
- argE->startoffset = get_startoffset (L, 3, argE->textlen);
- argE->eflags = luaL_optint (L, 4, 0);
-}
-
-/* function find (s, patt, [st], [cf], [ef], [lo]) */
-/* function match (s, patt, [st], [cf], [ef], [lo]) */
-static void checkarg_find_f (lua_State *L, TArgComp *argC, TArgExec *argE) {
- argE->text = luaL_checklstring (L, 1, &argE->textlen);
- argC->pattern = luaL_checkstring (L, 2);
- argE->startoffset = get_startoffset (L, 3, argE->textlen);
- argC->cflags = luaL_optint (L, 4, 0);
- argE->eflags = luaL_optint (L, 5, 0);
- argC->locale = luaL_optstring (L, 6, NULL);
+static int generate_error (lua_State *L, const TPcre *ud, int errcode) {
+ const char *key = get_flag_key (pcre_error_flags, errcode);
+ (void) ud;
+ if (key)
+ return luaL_error (L, "error PCRE_%s", key);
+ else
+ return luaL_error (L, "PCRE error code %d", errcode);
}
-/* function gmatch (s, patt, [cf], [ef], [lo]) */
-/* function split (s, patt, [cf], [ef], [lo]) */
-static void checkarg_gmatch_split (lua_State *L, TArgComp *argC, TArgExec *argE) {
- argE->text = luaL_checklstring (L, 1, &argE->textlen);
- argC->pattern = luaL_checkstring (L, 2);
- argC->cflags = luaL_optint (L, 3, 0);
- argE->eflags = luaL_optint (L, 4, 0);
- argC->locale = luaL_optstring (L, 5, NULL);
+static TPcre* check_ud (lua_State *L, int stackpos) {
+ return (TPcre *)luaL_checkudata (L, stackpos, pcre_handle);
}
#if PCRE_MAJOR >= 6
/* method r:dfa_exec (s, [st], [ef], [ovecsize], [wscount]) */
-static void checkarg_dfa_exec (lua_State *L, TArgExec *argE) {
- argE->ud = check_ud (L, 1);
+static void checkarg_dfa_exec (lua_State *L, TArgExec *argE, TPcre **ud) {
+ *ud = check_ud (L, 1);
argE->text = luaL_checklstring (L, 2, &argE->textlen);
argE->startoffset = get_startoffset (L, 3, argE->textlen);
- argE->eflags = luaL_optint (L, 4, 0);
+ argE->eflags = luaL_optint (L, 4, EFLAGS_DEFAULT);
argE->ovecsize = luaL_optint (L, 5, 100);
argE->wscount = luaL_optint (L, 6, 50);
}
@@ -191,49 +139,7 @@ static int compile_regex (lua_State *L, const TArgComp *argC, TPcre **pud) {
return 1;
}
-static int Lpcre_new (lua_State *L) {
- TArgComp argC;
- checkarg_new (L, &argC);
- return compile_regex (L, &argC, NULL);
-}
-
-static void push_substrings (lua_State *L, TPcre *ud, const char *text) {
- int i;
- CheckStack (L, NSUB(ud));
- for (i = 1; i <= NSUB(ud); i++) {
- PUSH_SUB_OR_FALSE (L, ud, text, i);
- }
-}
-
-static void push_substring_table (lua_State *L, TPcre *ud, const char *text) {
- int i;
- lua_newtable (L);
- for (i = 1; i <= NSUB(ud); i++) {
- PUSH_SUB_OR_FALSE (L, ud, text, i);
- lua_rawseti (L, -2, i);
- }
-}
-
-static void push_offset_table (lua_State *L, const int *offsets, int nmax) {
- int i, j, k;
- lua_newtable (L);
- for (i=1, j=1; i <= nmax; i++) {
- k = i * 2;
- if (offsets[k] >= 0) {
- lua_pushinteger (L, offsets[k] + 1);
- lua_rawseti (L, -2, j++);
- lua_pushinteger (L, offsets[k+1]);
- lua_rawseti (L, -2, j++);
- }
- else {
- lua_pushboolean (L, 0);
- lua_rawseti (L, -2, j++);
- lua_pushboolean (L, 0);
- lua_rawseti (L, -2, j++);
- }
- }
-}
-
+#if PCRE_MAJOR >= 4
/* the target table must be on lua stack top */
static void do_named_subpatterns (lua_State *L, TPcre *ud, const char *text) {
int i, namecount, name_entry_size;
@@ -256,45 +162,27 @@ static void do_named_subpatterns (lua_State *L, TPcre *ud, const char *text) {
tabptr += name_entry_size;
}
}
+#endif /* #if PCRE_MAJOR >= 4 */
-static int generic_tfind (lua_State *L, int tfind) {
- TPcre *ud;
- TArgExec argE;
- int res;
-
- checkarg_tfind (L, &argE);
- ud = argE.ud;
- res = pcre_exec (ud->pr, ud->extra, argE.text, (int)argE.textlen,
- argE.startoffset, argE.eflags, ud->match,
- (ud->ncapt + 1) * 3);
- if (IS_MATCH (res)) {
- PUSH_OFFSETS (L, ud, 0, 0);
- if (tfind)
- push_substring_table (L, ud, argE.text);
- else
- push_offset_table (L, ud->match, ud->ncapt);
- do_named_subpatterns (L, ud, argE.text);
- return 3;
- }
- else if (res == CODE_NOMATCH)
- return lua_pushnil (L), 1;
- else
- return generate_error(L, res);
+static int tfind_exec (TPcre *ud, TArgExec *argE) {
+ return pcre_exec (ud->pr, ud->extra, argE->text, (int)argE->textlen,
+ argE->startoffset, argE->eflags, ud->match, (ud->ncapt + 1) * 3);
}
#if PCRE_MAJOR >= 6
static int Lpcre_dfa_exec (lua_State *L)
{
TArgExec argE;
+ TPcre *ud;
int res;
int *buf, *ovector, *wspace;
- checkarg_dfa_exec (L, &argE);
+ checkarg_dfa_exec (L, &argE, &ud);
buf = (int*) Lmalloc (L, (argE.ovecsize + argE.wscount) * sizeof(int));
ovector = buf;
wspace = buf + argE.ovecsize;
- res = pcre_dfa_exec (argE.ud->pr, argE.ud->extra, argE.text, (int)argE.textlen,
+ res = pcre_dfa_exec (ud->pr, ud->extra, argE.text, (int)argE.textlen,
argE.startoffset, argE.eflags, ovector, argE.ovecsize, wspace, argE.wscount);
if (IS_MATCH (res) || res == PCRE_ERROR_PARTIAL) {
@@ -315,250 +203,33 @@ static int Lpcre_dfa_exec (lua_State *L)
if (res == CODE_NOMATCH)
return lua_pushnil (L), 1;
else
- return generate_error (L, res);
+ return generate_error (L, ud, res);
}
}
#endif /* #if PCRE_MAJOR >= 6 */
-static int Lpcre_tfind (lua_State *L) {
- return generic_tfind (L, 1);
-}
-
-static int Lpcre_exec (lua_State *L) {
- return generic_tfind (L, 0);
-}
-
-static int gmatch_iter (lua_State *L) {
- int res;
- size_t textlen;
- TPcre *ud = (TPcre*) lua_touserdata (L, lua_upvalueindex (1));
- const char *text = lua_tolstring (L, lua_upvalueindex (2), &textlen);
- int eflags = lua_tointeger (L, lua_upvalueindex (3));
- int startoffset = lua_tointeger (L, lua_upvalueindex (4));
-
- if (startoffset > (int)textlen)
- return 0;
- res = pcre_exec (ud->pr, ud->extra, text, textlen, startoffset, eflags,
- ud->match, (NSUB(ud) + 1) * 3);
- if (IS_MATCH (res)) {
- int incr = (SUB_LEN(ud,0) == 0) ? 1 : 0; /* prevent endless loop */
- PUSH_END (L, ud, incr, 0); /* update start offset */
- lua_replace (L, lua_upvalueindex (4));
- /* push either captures or entire match */
- if (NSUB(ud)) {
- push_substrings (L, ud, text);
- return NSUB(ud);
- }
- else {
- PUSH_SUB (L, ud, text, 0);
- return 1;
- }
- }
- else if (res == CODE_NOMATCH)
- return 0;
- else
- return generate_error(L, res);
+static int gmatch_exec (TUserdata *ud, TArgExec *argE) {
+ return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
+ argE->startoffset, argE->eflags, ud->match, (NSUB(ud) + 1) * 3);
}
-static int split_iter (lua_State *L) {
- int res;
- size_t textlen;
- TPcre *ud = (TPcre*) lua_touserdata (L, lua_upvalueindex (1));
- const char *text = lua_tolstring (L, lua_upvalueindex (2), &textlen);
- int eflags = lua_tointeger (L, lua_upvalueindex (3));
- int startoffset = lua_tointeger (L, lua_upvalueindex (4));
- int newoffset;
-
- if (startoffset >= (int)textlen)
- return 0;
- for (newoffset = startoffset; newoffset < (int)textlen; ++newoffset) {
- res = pcre_exec (ud->pr, ud->extra, text, textlen, newoffset, eflags,
- ud->match, (NSUB(ud) + 1) * 3);
- if (IS_MATCH (res)) {
- if (SUB_LEN(ud,0)) {
- PUSH_END (L, ud, 0, 0); /* update start offset */
- lua_replace (L, lua_upvalueindex (4));
- /* push text preceding the match */
- lua_pushlstring (L, text + startoffset, SUB_BEG(ud,0) - startoffset);
- /* push either captures or entire match */
- if (NSUB(ud)) {
- push_substrings (L, ud, text);
- return 1 + NSUB(ud);
- }
- else {
- PUSH_SUB (L, ud, text, 0);
- return 2;
- }
- }
- }
- else if (res == CODE_NOMATCH)
- break;
- else
- return generate_error (L, res);
- }
- lua_pushinteger (L, textlen); /* mark as last iteration */
- lua_replace (L, lua_upvalueindex (4)); /* update start offset */
- lua_pushlstring (L, text + startoffset, textlen - startoffset);
- return 1;
+static void gmatch_pushsubject (lua_State *L, TArgExec *argE) {
+ lua_pushlstring (L, argE->text, argE->textlen);
}
-static int generic_gmatch (lua_State *L, lua_CFunction iter)
-{
- TArgComp argC;
- TArgExec argE;
- checkarg_gmatch_split (L, &argC, &argE);
- compile_regex (L, &argC, &argE.ud); /* 1-st upvalue: ud */
- lua_pushlstring (L, argE.text, argE.textlen); /* 2-nd upvalue: s */
- lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */
- lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */
- lua_pushcclosure (L, iter, 4);
- return 1;
+static int findmatch_exec (TPcre *ud, TArgExec *argE) {
+ return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
+ argE->startoffset, argE->eflags, ud->match, (NSUB(ud) + 1) * 3);
}
-static int Lpcre_gmatch (lua_State *L) {
- return generic_gmatch (L, gmatch_iter);
+static int gsub_exec (TPcre *ud, TArgExec *argE, int st) {
+ return pcre_exec (ud->pr, ud->extra, argE->text, (int)argE->textlen,
+ st, argE->eflags, ud->match, (NSUB(ud) + 1) * 3);
}
-static int Lpcre_split (lua_State *L) {
- return generic_gmatch (L, split_iter);
-}
-
-static int generic_find (lua_State *L, int find) {
- int res;
- TPcre *ud;
- TArgComp argC;
- TArgExec argE;
-
- checkarg_find_f (L, &argC, &argE);
- compile_regex (L, &argC, &ud);
- res = pcre_exec (ud->pr, ud->extra, argE.text, argE.textlen, argE.startoffset,
- argE.eflags, ud->match, (NSUB(ud) + 1) * 3);
- if (IS_MATCH (res)) {
- if (find)
- PUSH_OFFSETS (L, ud, 0, 0);
- if (NSUB(ud)) /* push captures */
- push_substrings (L, ud, argE.text);
- else if (!find) {
- PUSH_SUB (L, ud, argE.text, 0);
- return 1;
- }
- return find ? NSUB(ud) + 2 : NSUB(ud);
- }
- else if (res == CODE_NOMATCH)
- return lua_pushnil (L), 1;
- else
- return generate_error (L, res);
-}
-
-static int Lpcre_find (lua_State *L) {
- return generic_find (L, 1);
-}
-
-static int Lpcre_match (lua_State *L) {
- return generic_find (L, 0);
-}
-
-static int Lpcre_gsub (lua_State *L) {
- TPcre *ud;
- TArgComp argC;
- TArgExec argE;
- int reps = 0, st = 0;
- TBuffer BufOut, BufRep;
- TFreeList freelist;
- /*--------------------------------------------------------------------------*/
- checkarg_gsub (L, &argC, &argE);
- compile_regex (L, &argC, &ud);
- freelist_init (&freelist);
- /*--------------------------------------------------------------------------*/
- if (argE.reptype == LUA_TSTRING) {
- buffer_init (&BufRep, 256, L, &freelist);
- bufferZ_putrepstring (&BufRep, argE.funcpos, NSUB(ud));
- }
- else if (argE.reptype == LUA_TFUNCTION)
- lua_pushliteral (L, "break");
- /*--------------------------------------------------------------------------*/
- buffer_init (&BufOut, 1024, L, &freelist);
- while ((argE.maxmatch < 0 || reps < argE.maxmatch) && st <= (int)argE.textlen) {
- int from, to, res;
- res = pcre_exec (ud->pr, ud->extra, argE.text, (int)argE.textlen, st,
- argE.eflags, ud->match, (NSUB(ud) + 1) * 3);
- if (res == CODE_NOMATCH)
- break;
- else if (!IS_MATCH (res)) {
- freelist_free (&freelist);
- return generate_error (L, res);
- }
- ++reps;
- from = SUB_BEG(ud,0);
- to = SUB_END(ud,0);
- if (from > st)
- buffer_addlstring (&BufOut, argE.text + st, from - st);
- /*------------------------------------------------------------------------*/
- if (argE.reptype == LUA_TSTRING) {
- size_t iter = 0, num;
- const char *str;
- while (bufferZ_next (&BufRep, &iter, &num, &str)) {
- if (str)
- buffer_addlstring (&BufOut, str, num);
- else if (num == 0 || SUB_VALID (ud,num))
- buffer_addlstring (&BufOut, argE.text + SUB_BEG(ud,num), SUB_LEN(ud,num));
- }
- }
- /*------------------------------------------------------------------------*/
- else if (argE.reptype == LUA_TTABLE) {
- if (NSUB(ud) > 0)
- PUSH_SUB_OR_FALSE (L, ud, argE.text, 1);
- else
- lua_pushlstring (L, argE.text + from, to - from);
- lua_gettable (L, argE.funcpos);
- }
- /*------------------------------------------------------------------------*/
- else if (argE.reptype == LUA_TFUNCTION) {
- int narg;
- lua_pushvalue (L, argE.funcpos);
- if (NSUB(ud) > 0) {
- push_substrings (L, ud, argE.text);
- narg = NSUB(ud);
- }
- else {
- lua_pushlstring (L, argE.text + from, to - from);
- narg = 1;
- }
- if (0 != lua_pcall (L, narg, 2, 0)) {
- freelist_free (&freelist);
- return lua_error (L); /* re-raise the error */
- }
- }
- /*------------------------------------------------------------------------*/
- if (argE.reptype != LUA_TSTRING) {
- int pos = (argE.reptype == LUA_TFUNCTION) ? -2 : -1;
- if (lua_tostring (L, pos))
- buffer_addvalue (&BufOut, pos);
- else if (!lua_toboolean (L, pos))
- buffer_addlstring (&BufOut, argE.text + from, to - from);
- else {
- freelist_free (&freelist);
- luaL_error (L, "invalid replacement value (a %s)", luaL_typename (L, pos));
- }
- if (argE.reptype == LUA_TFUNCTION && lua_equal (L, -1, -3))
- argE.maxmatch = 0; /* signal break from the loop */
- lua_pop (L, -pos);
- }
- /*------------------------------------------------------------------------*/
- if (from < to)
- st = to;
- else if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
- buffer_addlstring (&BufOut, argE.text + st, 1);
- ++st;
- }
- else break;
- }
- /*--------------------------------------------------------------------------*/
- buffer_addlstring (&BufOut, argE.text + st, argE.textlen - st);
- buffer_pushresult (&BufOut);
- lua_pushinteger (L, reps);
- freelist_free (&freelist);
- return 2;
+static int split_exec (TPcre *ud, TArgExec *argE, int offset) {
+ return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, offset,
+ argE->eflags, ud->match, (NSUB(ud) + 1) * 3);
}
static int Lpcre_gc (lua_State *L) {
@@ -588,8 +259,8 @@ static int Lpcre_version (lua_State *L) {
}
static const luaL_reg pcremeta[] = {
- { "exec", Lpcre_exec },
- { "tfind", Lpcre_tfind }, /* old match */
+ { "exec", ud_exec },
+ { "tfind", ud_tfind }, /* old match */
#if PCRE_MAJOR >= 6
{ "dfa_exec", Lpcre_dfa_exec },
#endif
@@ -599,16 +270,18 @@ static const luaL_reg pcremeta[] = {
};
static const luaL_reg rexlib[] = {
- { "match", Lpcre_match },
- { "find", Lpcre_find },
- { "gmatch", Lpcre_gmatch },
- { "gsub", Lpcre_gsub },
- { "split", Lpcre_split },
- { "new", Lpcre_new },
+ { "match", match },
+ { "find", find },
+ { "gmatch", gmatch },
+ { "gsub", gsub },
+ { "split", split },
+ { "new", ud_new },
{ "plainfind", plainfind_func },
{ "flags", Lpcre_get_flags },
- { "config", Lpcre_config },
{ "version", Lpcre_version },
+#if PCRE_MAJOR >= 4
+ { "config", Lpcre_config },
+#endif
{ NULL, NULL }
};
diff --git a/src/lpcre_f.c b/src/lpcre_f.c
index b19aece..e474c74 100755
--- a/src/lpcre_f.c
+++ b/src/lpcre_f.c
@@ -82,7 +82,7 @@ static flag_pair pcre_flags[] = {
{ NULL, 0 }
};
-static flag_pair pcre_error_flags[] = {
+flag_pair pcre_error_flags[] = {
{ "ERROR_NOMATCH", PCRE_ERROR_NOMATCH },
{ "ERROR_NULL", PCRE_ERROR_NULL },
{ "ERROR_BADOPTION", PCRE_ERROR_BADOPTION },
@@ -147,6 +147,7 @@ int Lpcre_get_flags (lua_State *L) {
return get_flags (L, fps);
}
+#if PCRE_MAJOR >= 4
int Lpcre_config (lua_State *L) {
int val;
flag_pair *fp;
@@ -162,12 +163,5 @@ int Lpcre_config (lua_State *L) {
}
return 1;
}
-
-int generate_error (lua_State *L, int errcode) {
- const char *key = get_flag_key (pcre_error_flags, errcode);
- if (key)
- return luaL_error (L, "error PCRE_%s", key);
- else
- return luaL_error (L, "PCRE error code %d", errcode);
-}
+#endif
diff --git a/src/lposix.c b/src/lposix.c
index fba8052..b3fe8f9 100755
--- a/src/lposix.c
+++ b/src/lposix.c
@@ -42,16 +42,17 @@
# define EFLAGS_DEFAULT 0
#endif
-const char posix_typename[] = REX_LIBNAME"_regex";
-const char *posix_handle = posix_typename;
-
#define CODE_NOMATCH REG_NOMATCH
#define IS_MATCH(res) ((res) == 0)
#define SUB_BEG(ud,n) ud->match[n].rm_so
#define SUB_END(ud,n) ud->match[n].rm_eo
#define SUB_LEN(ud,n) (SUB_END(ud,n) - SUB_BEG(ud,n))
#define SUB_VALID(ud,n) (SUB_BEG(ud,n) >= 0)
-#define NSUB(ud) ((int)ud->r.re_nsub)
+#ifdef REX_NSUB_BASE1
+# define NSUB(ud) ((int)ud->r.re_nsub - 1)
+#else
+# define NSUB(ud) ((int)ud->r.re_nsub)
+#endif
#define PUSH_SUB(L,ud,text,n) \
lua_pushlstring (L, (text) + SUB_BEG(ud,n), SUB_LEN(ud,n))
@@ -63,10 +64,10 @@ const char *posix_handle = posix_typename;
#define PUSH_END(L,ud,offs,n) lua_pushinteger(L, (offs) + SUB_END(ud,n))
#define PUSH_OFFSETS(L,ud,offs,n) (PUSH_START(L,ud,offs,n), PUSH_END(L,ud,offs,n))
-
-/* Data Types
- ******************************************************************************
- */
+#define BASE(st) (st)
+#define PULL(st,from) ((void)st)
+#define OPTLOCALE(trg,L,pos) ((void)trg)
+#define DO_NAMED_SUBPATTERNS(L,ud,text) ((void)L)
typedef struct {
regex_t r;
@@ -74,22 +75,11 @@ typedef struct {
int freed;
} TPosix;
-typedef struct { /* regcomp arguments */
- const char * pattern;
- size_t patlen;
- int cflags;
-} TArgComp;
-
-typedef struct { /* regexec arguments */
- TPosix * ud;
- const char * text;
- size_t textlen;
- int startoffset;
- int eflags;
- int funcpos;
- int maxmatch;
- int reptype; /* used with gsub */
-} TArgExec;
+#define TUserdata TPosix
+#include "algo.h"
+
+const char posix_typename[] = REX_LIBNAME"_regex";
+const char *posix_handle = posix_typename;
/* Functions
******************************************************************************
@@ -99,56 +89,7 @@ static TPosix* check_ud (lua_State *L, int stackpos) {
return (TPosix *)luaL_checkudata (L, stackpos, posix_handle);
}
-static void checkarg_new (lua_State *L, TArgComp *argC) {
- argC->pattern = luaL_checklstring (L, 1, &argC->patlen);
- argC->cflags = luaL_optint (L, 2, CFLAGS_DEFAULT);
-}
-
-/* function gsub (s, patt, f, [n], [cf], [ef]) */
-static void checkarg_gsub (lua_State *L, TArgComp *argC, TArgExec *argE) {
- argE->text = luaL_checklstring (L, 1, &argE->textlen);
- argC->pattern = luaL_checklstring (L, 2, &argC->patlen);
- lua_tostring (L, 3); /* converts number (if any) to string */
- argE->reptype = lua_type (L, 3);
- if (argE->reptype != LUA_TSTRING && argE->reptype != LUA_TTABLE &&
- argE->reptype != LUA_TFUNCTION) {
- luaL_argerror (L, 3, "must be string, table or function");
- }
- argE->funcpos = 3;
- argE->maxmatch = OptLimit (L, 4);
- argC->cflags = luaL_optint (L, 5, CFLAGS_DEFAULT);
- argE->eflags = luaL_optint (L, 6, EFLAGS_DEFAULT);
-}
-
-/* method r:tfind (s, [st], [ef]) */
-/* method r:exec (s, [st], [ef]) */
-static void checkarg_tfind (lua_State *L, TArgExec *argE) {
- argE->ud = check_ud (L, 1);
- argE->text = luaL_checklstring (L, 2, &argE->textlen);
- argE->startoffset = get_startoffset (L, 3, argE->textlen);
- argE->eflags = luaL_optint (L, 4, EFLAGS_DEFAULT);
-}
-
-/* function find (s, patt, [st], [cf], [ef]) */
-/* function match (s, patt, [st], [cf], [ef]) */
-static void checkarg_find_f (lua_State *L, TArgComp *argC, TArgExec *argE) {
- argE->text = luaL_checklstring (L, 1, &argE->textlen);
- argC->pattern = luaL_checklstring (L, 2, &argC->patlen);
- argE->startoffset = get_startoffset (L, 3, argE->textlen);
- argC->cflags = luaL_optint (L, 4, CFLAGS_DEFAULT);
- argE->eflags = luaL_optint (L, 5, EFLAGS_DEFAULT);
-}
-
-/* function gmatch (s, patt, [cf], [ef]) */
-/* function split (s, patt, [cf], [ef]) */
-static void checkarg_gmatch_split (lua_State *L, TArgComp *argC, TArgExec *argE) {
- argE->text = luaL_checklstring (L, 1, &argE->textlen);
- argC->pattern = luaL_checklstring (L, 2, &argC->patlen);
- argC->cflags = luaL_optint (L, 3, CFLAGS_DEFAULT);
- argE->eflags = luaL_optint (L, 4, EFLAGS_DEFAULT);
-}
-
-int generate_error (lua_State *L, const TPosix *ud, int errcode) {
+static int generate_error (lua_State *L, const TPosix *ud, int errcode) {
char errbuf[80];
regerror (errcode, &ud->r, errbuf, sizeof (errbuf));
return luaL_error (L, "%s", errbuf);
@@ -180,358 +121,85 @@ static int compile_regex (lua_State *L, const TArgComp *argC, TPosix **pud) {
return 1;
}
-static void push_substrings (lua_State *L, TPosix *ud, const char *text) {
- int i;
- CheckStack (L, NSUB(ud));
- for (i = 1; i <= NSUB(ud); i++) {
- PUSH_SUB_OR_FALSE (L, ud, text, i);
- }
-}
-
-static void push_substring_table (lua_State *L, TPosix *ud, const char *text) {
- int i;
- lua_newtable (L);
- for (i = 1; i <= NSUB(ud); i++) {
- PUSH_SUB_OR_FALSE (L, ud, text, i);
- lua_rawseti (L, -2, i);
- }
-}
-
-static void push_offset_table (lua_State *L, TPosix *ud, int startoffset) {
- int i, j;
-
- lua_newtable (L);
- for (i=1, j=1; i <= NSUB(ud); i++) {
- if (SUB_VALID (ud,i)) {
- PUSH_START (L, ud, startoffset, i);
- lua_rawseti (L, -2, j++);
- PUSH_END (L, ud, startoffset, i);
- lua_rawseti (L, -2, j++);
- }
- else {
- lua_pushboolean (L, 0);
- lua_rawseti (L, -2, j++);
- lua_pushboolean (L, 0);
- lua_rawseti (L, -2, j++);
- }
- }
-}
-
-static void CheckStartEnd (TArgExec *argE) {
#ifdef REX_POSIX_EXT
+static void CheckStartEnd (TArgExec *argE, TPosix *ud) {
if (argE->eflags & REG_STARTEND) {
- SUB_BEG(argE->ud,0) = argE->startoffset;
- SUB_END(argE->ud,0) = argE->textlen;
+ ud->match[0].rm_so = argE->startoffset;
+ ud->match[0].rm_eo = argE->textlen;
argE->startoffset = 0;
}
else
argE->text += argE->startoffset;
+}
+#endif
+
+static int tfind_exec (TPosix *ud, TArgExec *argE) {
+#ifdef REX_POSIX_EXT
+ CheckStartEnd (argE, ud);
#else
argE->text += argE->startoffset;
#endif
+ return regexec (&ud->r, argE->text, NSUB(ud) + 1, ud->match, argE->eflags);
}
-static int generic_tfind (lua_State *L, int tfind) {
- int res;
- TArgExec argE;
- TPosix *ud;
-
- checkarg_tfind (L, &argE);
- CheckStartEnd (&argE);
- ud = argE.ud; /* avoid too many redirections */
-
- /* execute the search */
- res = regexec (&ud->r, argE.text, NSUB(ud) + 1, ud->match, argE.eflags);
- if (IS_MATCH (res)) {
- PUSH_OFFSETS (L, ud, argE.startoffset, 0);
- if (tfind)
- push_substring_table (L, ud, argE.text);
- else
- push_offset_table (L, ud, argE.startoffset);
- return 3;
- }
- else if (res == CODE_NOMATCH)
- return lua_pushnil (L), 1;
- else
- return generate_error (L, ud, res);
-}
-
-static int Posix_tfind (lua_State *L) {
- return generic_tfind (L, 1);
-}
-
-static int Posix_exec (lua_State *L) {
- return generic_tfind (L, 0);
-}
-
-static int gmatch_iter (lua_State *L) {
- int res;
- size_t textlen;
- int incr;
- TPosix *ud = (TPosix*) lua_touserdata (L, lua_upvalueindex (1));
- const char *text = lua_tolstring (L, lua_upvalueindex (2), &textlen);
- int eflags = lua_tointeger (L, lua_upvalueindex (3));
- int startoffset = lua_tointeger (L, lua_upvalueindex (4));
-
- if (startoffset > (int)textlen)
- return 0;
+static int gmatch_exec (TUserdata *ud, TArgExec *argE) {
+ if (argE->startoffset > 0)
+ argE->eflags |= REG_NOTBOL;
#ifdef REX_POSIX_EXT
- if (eflags & REG_STARTEND) {
+ if (argE->eflags & REG_STARTEND) {
SUB_BEG(ud,0) = 0;
- SUB_END(ud,0) = textlen - startoffset;
+ SUB_END(ud,0) = argE->textlen - argE->startoffset;
}
#endif
- /* execute the search */
- text += startoffset;
- res = regexec (&ud->r, text, NSUB(ud) + 1, ud->match, eflags);
- if (IS_MATCH (res)) {
- /* push either captures or entire match */
- if (NSUB(ud))
- push_substrings (L, ud, text);
- else
- PUSH_SUB (L, ud, text, 0);
- incr = (SUB_LEN(ud,0) == 0) ? 1 : 0; /* prevent endless loop */
- PUSH_END (L, ud, startoffset + incr, 0); /* update start offset */
- lua_replace (L, lua_upvalueindex (4));
- return NSUB(ud) ? NSUB(ud) : 1;
- }
- else if (res == CODE_NOMATCH)
- return 0;
- else
- return generate_error (L, ud, res);
+ argE->text += argE->startoffset;
+ return regexec (&ud->r, argE->text, NSUB(ud) + 1, ud->match, argE->eflags);
}
-static int split_iter (lua_State *L) {
- int res;
- size_t textlen;
- int newoffset;
- TPosix *ud = (TPosix*) lua_touserdata (L, lua_upvalueindex (1));
- const char *text = lua_tolstring (L, lua_upvalueindex (2), &textlen);
- int eflags = lua_tointeger (L, lua_upvalueindex (3));
- int startoffset = lua_tointeger (L, lua_upvalueindex (4));
-
- if (startoffset >= (int)textlen)
- return 0;
- for (newoffset = startoffset; newoffset < (int)textlen; ++newoffset) {
+static void gmatch_pushsubject (lua_State *L, TArgExec *argE) {
#ifdef REX_POSIX_EXT
- if (eflags & REG_STARTEND) {
- SUB_BEG(ud,0) = 0;
- SUB_END(ud,0) = textlen - newoffset;
- }
+ if (argE->eflags & REG_STARTEND)
+ lua_pushlstring (L, argE->text, argE->textlen);
+ else
+ lua_pushlstring (L, argE->text, strlen (argE->text));
+#else
+ lua_pushlstring (L, argE->text, strlen (argE->text));
#endif
-
- /* execute the search */
- res = regexec (&ud->r, text + newoffset, NSUB(ud) + 1, ud->match, eflags);
- if (IS_MATCH (res)) {
- if (SUB_LEN(ud,0)) {
- PUSH_END (L, ud, newoffset, 0); /* update start offset */
- lua_replace (L, lua_upvalueindex (4));
- /* push text preceding the match */
- lua_pushlstring (L, text + startoffset, SUB_BEG(ud,0) + newoffset - startoffset);
- /* push either captures or entire match */
- if (NSUB(ud))
- push_substrings (L, ud, text + newoffset);
- else
- PUSH_SUB (L, ud, text + newoffset, 0);
- return NSUB(ud) ? NSUB(ud)+1 : 2;
- }
- }
- else if (res == CODE_NOMATCH)
- break;
- else
- return generate_error (L, ud, res);
- }
- lua_pushinteger (L, textlen); /* mark as last iteration */
- lua_replace (L, lua_upvalueindex (4)); /* update start offset */
- lua_pushlstring (L, text + startoffset, textlen - startoffset);
- return 1;
}
-static int generic_gmatch (lua_State *L, lua_CFunction iter)
-{
- TArgComp argC;
- TArgExec argE;
- checkarg_gmatch_split (L, &argC, &argE);
- compile_regex (L, &argC, &argE.ud); /* 1-st upvalue: ud */
+static int findmatch_exec (TPosix *ud, TArgExec *argE) {
#ifdef REX_POSIX_EXT
- if (argE.eflags & REG_STARTEND)
- lua_pushlstring (L, argE.text, argE.textlen); /* 2-nd upvalue: s */
- else
- lua_pushlstring (L, argE.text, strlen (argE.text));
+ CheckStartEnd (argE, ud);
#else
- lua_pushlstring (L, argE.text, strlen (argE.text));
+ argE->text += argE->startoffset;
#endif
- lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */
- lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */
- lua_pushcclosure (L, iter, 4);
- return 1;
-}
-
-static int Posix_gmatch (lua_State *L) {
- return generic_gmatch (L, gmatch_iter);
+ return regexec (&ud->r, argE->text, NSUB(ud) + 1, ud->match, argE->eflags);
}
-static int Posix_split (lua_State *L) {
- return generic_gmatch (L, split_iter);
-}
-
-static int Posix_new (lua_State *L) {
- TArgComp argC;
- checkarg_new (L, &argC);
- return compile_regex (L, &argC, NULL);
-}
-
-static int generic_find (lua_State *L, int find) {
- int res;
- TPosix *ud;
- TArgComp argC;
- TArgExec argE;
-
- checkarg_find_f (L, &argC, &argE);
- compile_regex (L, &argC, &argE.ud);
- CheckStartEnd (&argE);
- ud = argE.ud;
-
- res = regexec (&ud->r, argE.text, NSUB(ud) + 1, ud->match, argE.eflags);
- if (IS_MATCH (res)) {
- if (find)
- PUSH_OFFSETS (L, ud, argE.startoffset, 0);
- if (NSUB(ud)) /* push captures */
- push_substrings (L, ud, argE.text);
- else if (!find) { /* push entire match */
- PUSH_SUB (L, ud, argE.text, 0);
- return 1;
- }
- return find ? NSUB(ud) + 2 : NSUB(ud);
+static int gsub_exec (TPosix *ud, TArgExec *argE, int st) {
+#ifdef REX_POSIX_EXT
+ if(argE->eflags & REG_STARTEND) {
+ SUB_BEG(ud,0) = 0;
+ SUB_END(ud,0) = argE->textlen - st;
}
- else if (res == CODE_NOMATCH)
- return lua_pushnil (L), 1;
- else
- return generate_error (L, ud, res);
-}
-
-static int Posix_find (lua_State *L) {
- return generic_find (L, 1);
-}
-
-static int Posix_match (lua_State *L) {
- return generic_find (L, 0);
+#endif
+ if (st > 0)
+ argE->eflags |= REG_NOTBOL;
+ return regexec (&ud->r, argE->text+st, NSUB(ud)+1, ud->match, argE->eflags);
}
-static int Posix_gsub (lua_State *L) {
- TPosix *ud;
- TArgComp argC;
- TArgExec argE;
- int reps = 0, st = 0;
- TBuffer BufOut, BufRep;
- TFreeList freelist;
- /*--------------------------------------------------------------------------*/
- checkarg_gsub (L, &argC, &argE);
- compile_regex (L, &argC, &ud);
- freelist_init (&freelist);
- /*--------------------------------------------------------------------------*/
- if (argE.reptype == LUA_TSTRING) {
- buffer_init (&BufRep, 256, L, &freelist);
- bufferZ_putrepstring (&BufRep, argE.funcpos, NSUB(ud));
- }
- else if (argE.reptype == LUA_TFUNCTION)
- lua_pushliteral (L, "break");
- /*--------------------------------------------------------------------------*/
- buffer_init (&BufOut, 1024, L, &freelist);
- while ((argE.maxmatch < 0 || reps < argE.maxmatch) && st <= (int)argE.textlen) {
- int from, to, res;
+static int split_exec (TPosix *ud, TArgExec *argE, int offset) {
#ifdef REX_POSIX_EXT
- if(argE.eflags & REG_STARTEND) {
- SUB_BEG(ud,0) = 0;
- SUB_END(ud,0) = argE.textlen - st;
- }
-#endif
- res = regexec (&ud->r, argE.text+st, NSUB(ud)+1, ud->match, argE.eflags);
- if (res == CODE_NOMATCH)
- break;
- else if (!IS_MATCH (res)) {
- freelist_free (&freelist);
- return generate_error (L, ud, res);
- }
- ++reps;
- from = st + SUB_BEG(ud,0);
- to = st + SUB_END(ud,0);
- if (from > st)
- buffer_addlstring (&BufOut, argE.text + st, from - st);
- /*------------------------------------------------------------------------*/
- if (argE.reptype == LUA_TSTRING) {
- size_t iter = 0, num;
- const char *str;
- while (bufferZ_next (&BufRep, &iter, &num, &str)) {
- if (str == NULL) { /* got number in variable 'num' */
- if (num == 0) /* %0 : add the entire match */
- buffer_addlstring (&BufOut, argE.text + from, to - from);
- else { /* add captured substring */
- if (SUB_VALID (ud,num))
- buffer_addlstring (&BufOut, argE.text + st + SUB_BEG(ud,num),
- SUB_LEN(ud,num));
- }
- }
- else buffer_addlstring (&BufOut, str, num);
- }
- }
- /*------------------------------------------------------------------------*/
- else if (argE.reptype == LUA_TTABLE) {
- if (NSUB(ud) > 0)
- PUSH_SUB_OR_FALSE (L, ud, argE.text + st, 1);
- else
- lua_pushlstring (L, argE.text + from, to - from);
- lua_gettable (L, argE.funcpos);
- }
- /*------------------------------------------------------------------------*/
- else if (argE.reptype == LUA_TFUNCTION) {
- int narg;
- lua_pushvalue (L, argE.funcpos);
- if (NSUB(ud) > 0) {
- push_substrings (L, ud, argE.text + st);
- narg = NSUB(ud);
- }
- else {
- lua_pushlstring (L, argE.text + from, to - from);
- narg = 1;
- }
- if (0 != lua_pcall (L, narg, 2, 0)) {
- freelist_free (&freelist);
- return lua_error (L); /* re-raise the error */
- }
- }
- /*------------------------------------------------------------------------*/
- if (argE.reptype != LUA_TSTRING) {
- int pos = (argE.reptype == LUA_TFUNCTION) ? -2 : -1;
- if (lua_tostring (L, pos))
- buffer_addvalue (&BufOut, pos);
- else if (!lua_toboolean (L, pos))
- buffer_addlstring (&BufOut, argE.text + from, to - from);
- else {
- freelist_free (&freelist);
- luaL_error (L, "invalid replacement value (a %s)", luaL_typename (L, pos));
- }
- if (argE.reptype == LUA_TFUNCTION && lua_equal (L, -1, -3))
- argE.maxmatch = 0; /* signal break from the loop */
- lua_pop (L, -pos);
- }
- /*------------------------------------------------------------------------*/
- if (from < to)
- st = to;
- else if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
- buffer_addlstring (&BufOut, argE.text + st, 1);
- ++st;
- }
- else break;
+ if (argE->eflags & REG_STARTEND) {
+ SUB_BEG(ud,0) = 0;
+ SUB_END(ud,0) = argE->textlen - offset;
}
- /*--------------------------------------------------------------------------*/
- buffer_addlstring (&BufOut, argE.text + st, argE.textlen - st);
- buffer_pushresult (&BufOut);
- lua_pushinteger (L, reps);
- freelist_free (&freelist);
- return 2;
+#endif
+ if (offset > 0)
+ argE->eflags |= REG_NOTBOL;
+
+ return regexec (&ud->r, argE->text + offset, NSUB(ud) + 1, ud->match, argE->eflags);
}
static int Posix_gc (lua_State *L) {
@@ -601,20 +269,20 @@ static int Posix_get_flags (lua_State *L) {
}
static const luaL_reg posixmeta[] = {
- { "exec", Posix_exec },
- { "tfind", Posix_tfind }, /* old match */
+ { "exec", ud_exec },
+ { "tfind", ud_tfind }, /* old match */
{ "__gc", Posix_gc },
{ "__tostring", Posix_tostring },
{ NULL, NULL}
};
static const luaL_reg rexlib[] = {
- { "match", Posix_match },
- { "find", Posix_find },
- { "gmatch", Posix_gmatch },
- { "gsub", Posix_gsub },
- { "split", Posix_split },
- { "new", Posix_new },
+ { "match", match },
+ { "find", find },
+ { "gmatch", gmatch },
+ { "gsub", gsub },
+ { "split", split },
+ { "new", ud_new },
{ "flags", Posix_get_flags },
{ "plainfind", plainfind_func },
{ NULL, NULL }
diff --git a/src/rex_pcre.mak b/src/rex_pcre.mak
index 392eb09..83ae19a 100755
--- a/src/rex_pcre.mak
+++ b/src/rex_pcre.mak
@@ -20,7 +20,7 @@ TRG = rex_pcre
# ===========================================================================
# === END OF USER SETTINGS ===
-V = 2.1
+V = 2.2
DEFS = -DREX_OPENLIB=luaopen_$(TRG) -DREX_LIBNAME=\"$(TRG)\"
CFLAGS = $(MYCFLAGS) $(DEFS)
diff --git a/src/rex_posix.mak b/src/rex_posix.mak
index 12fc3e9..a3179bf 100755
--- a/src/rex_posix.mak
+++ b/src/rex_posix.mak
@@ -32,7 +32,7 @@ TRG = rex_posix
# ===========================================================================
# === END OF USER SETTINGS ===
-V = 2.1
+V = 2.2
DEFS = -DREX_OPENLIB=luaopen_$(TRG) -DREX_LIBNAME=\"$(TRG)\"
CFLAGS = $(MYCFLAGS) $(DEFS)
diff --git a/src/scite.properties b/src/scite.properties
index cf58e1a..35287aa 100755
--- a/src/scite.properties
+++ b/src/scite.properties
@@ -1,12 +1,21 @@
eol.mode=LF
cc=bcc32
-dir_lua=\Progr\lib\lua\lua_5.1
-dir_pcre=\Progr\lib\pcre\pcre_7.0
-dir_posix=\Progr\lib\henry_spencer
+dir_lib=\Progr\lib
+dir_lua=$(dir_lib)\lua\lua_5.1
+dir_pcre=$(dir_lib)\pcre\pcre_7.0
+dir_posix=$(dir_lib)\henry_spencer
+dir_tre=$(dir_lib)
+
+command.compile.lposix.c=cl.bat -A -I$(dir_lua) -I$(dir_posix) -c "$(FileNameExt)"
+
+command.compile.ltre.c=$(cc) -A -I$(dir_lua);$(dir_tre) -c "$(FileNameExt)"
command.compile.*.c=$(cc) -A -I$(dir_lua);$(dir_pcre);$(dir_posix) -c \
"$(FileNameExt)"
+command.compile.*.cpp=$(cc) -A -I$(dir_lua);$(dir_pcre);$(dir_posix) -c \
+ "$(FileNameExt)"
+
command.build.*.c=lrexlib.bat
command.build.*.h=lrexlib.bat
diff --git a/test/common_sets.lua b/test/common_sets.lua
index 9b55db2..8104f60 100755
--- a/test/common_sets.lua
+++ b/test/common_sets.lua
@@ -32,6 +32,8 @@ local function set_f_gmatch (lib, flg)
--{ subj patt results }
{ {("abcd"):rep(3), "(.)b.(d)"}, {{"a","d"},{"a","d"},{"a","d"}} },
{ {"abcd", ".*" }, {{"abcd",N},{"",N} } },--zero-length match
+ { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
+ { {"abc", "^." }, {{"a",N}} },--anchored pattern
}
end
@@ -58,6 +60,8 @@ local function set_f_split (lib, flg)
{ {"a,b,", ","}, {{"a",",",N}, {"b",",",N}, } },
{ {"a,,b", ","}, {{"a",",",N}, {"",",",N}, {"b",N,N}} },
{ {"ab<78>c", "<(.)(.)>"}, {{"ab","7","8"}, {"c",N,N}, } },
+ { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj
+ { {"abc", "^."}, {{"", "a",N}, {"bc",N,N}, } },--anchored pattern
}
end
@@ -141,18 +145,20 @@ local function set_f_gsub1 (lib, flg)
return {
Name = "Function gsub, set1",
Func = get_gsub (lib),
- --{ s, p, f, n, res1, res2 },
- { {subj, pat, "", 0}, {subj, 0} }, -- test "n" + empty_replace
- { {subj, pat, "", -1}, {subj, 0} }, -- test "n" + empty_replace
- { {subj, pat, "", 1}, {"cdef", 1} },
- { {subj, pat, "", 2}, {"cd", 2} },
- { {subj, pat, "", 3}, {"cd", 2} },
- { {subj, pat, "" }, {"cd", 2} },
- { {subj, pat, "#", 0}, {subj, 0} }, -- test "n" + non-empty_replace
- { {subj, pat, "#", 1}, {"#cdef", 1} },
- { {subj, pat, "#", 2}, {"#cd#", 2} },
- { {subj, pat, "#", 3}, {"#cd#", 2} },
- { {subj, pat, "#" }, {"#cd#", 2} },
+ --{ s, p, f, n, res1, res2, res3 },
+ { {subj, pat, "", 0}, {subj, 0, 0} }, -- test "n" + empty_replace
+ { {subj, pat, "", -1}, {subj, 0, 0} }, -- test "n" + empty_replace
+ { {subj, pat, "", 1}, {"cdef", 1, 1} },
+ { {subj, pat, "", 2}, {"cd", 2, 2} },
+ { {subj, pat, "", 3}, {"cd", 2, 2} },
+ { {subj, pat, "" }, {"cd", 2, 2} },
+ { {subj, pat, "#", 0}, {subj, 0, 0} }, -- test "n" + non-empty_replace
+ { {subj, pat, "#", 1}, {"#cdef", 1, 1} },
+ { {subj, pat, "#", 2}, {"#cd#", 2, 2} },
+ { {subj, pat, "#", 3}, {"#cd#", 2, 2} },
+ { {subj, pat, "#" }, {"#cd#", 2, 2} },
+ { {"a\0c", ".", "#" }, {"###", 3, 3} }, -- subj contains nuls
+ { {"abc", "^.", "#" }, {"#bc", 1, 1} }, -- anchored pattern
}
end
@@ -161,16 +167,16 @@ local function set_f_gsub2 (lib, flg)
return {
Name = "Function gsub, set2",
Func = get_gsub (lib),
- --{ s, p, f, n, res1, res2 },
- { {subj, pat, "<%1>" }, {"<a>b<c>", 2} }, -- test non-escaped chars in f
- { {subj, pat, "%<%1%>" }, {"<a>b<c>", 2} }, -- test escaped chars in f
- { {subj, pat, "" }, {"b", 2} }, -- test empty replace
- { {subj, pat, "1" }, {"1b1", 2} }, -- test odd and even %'s in f
- { {subj, pat, "%1" }, {"abc", 2} },
- { {subj, pat, "%%1" }, {"%1b%1", 2} },
- { {subj, pat, "%%%1" }, {"%ab%c", 2} },
- { {subj, pat, "%%%%1" }, {"%%1b%%1", 2} },
- { {subj, pat, "%%%%%1" }, {"%%ab%%c", 2} },
+ --{ s, p, f, n, res1, res2, res3 },
+ { {subj, pat, "<%1>" }, {"<a>b<c>", 2, 2} }, -- test non-escaped chars in f
+ { {subj, pat, "%<%1%>" }, {"<a>b<c>", 2, 2} }, -- test escaped chars in f
+ { {subj, pat, "" }, {"b", 2, 2} }, -- test empty replace
+ { {subj, pat, "1" }, {"1b1", 2, 2} }, -- test odd and even %'s in f
+ { {subj, pat, "%1" }, {"abc", 2, 2} },
+ { {subj, pat, "%%1" }, {"%1b%1", 2, 2} },
+ { {subj, pat, "%%%1" }, {"%ab%c", 2, 2} },
+ { {subj, pat, "%%%%1" }, {"%%1b%%1", 2, 2} },
+ { {subj, pat, "%%%%%1" }, {"%%ab%%c", 2, 2} },
}
end
@@ -178,11 +184,11 @@ local function set_f_gsub3 (lib, flg)
return {
Name = "Function gsub, set3",
Func = get_gsub (lib),
- --{ s, p, f, n, res1, res2 },
- { {"abc", "a", "%0" }, {"abc", 1} }, -- test (in)valid capture index
- { {"abc", "a", "%1" }, {"abc", 1} },
- { {"abc", "[ac]", "%1" }, {"abc", 2} },
- { {"abc", "(a)", "%1" }, {"abc", 1} },
+ --{ s, p, f, n, res1,res2,res3 },
+ { {"abc", "a", "%0" }, {"abc", 1, 1} }, -- test (in)valid capture index
+ { {"abc", "a", "%1" }, {"abc", 1, 1} },
+ { {"abc", "[ac]", "%1" }, {"abc", 2, 2} },
+ { {"abc", "(a)", "%1" }, {"abc", 1, 1} },
{ {"abc", "(a)", "%2" }, "invalid capture index" },
}
end
@@ -191,15 +197,15 @@ local function set_f_gsub4 (lib, flg)
return {
Name = "Function gsub, set4",
Func = get_gsub (lib),
- --{ s, p, f, n, res1, res2 },
- { {"a2c3", ".", "#" }, {"####", 4} }, -- test .
- { {"a2c3", ".+", "#" }, {"#", 1} }, -- test .+
- { {"a2c3", ".*", "#" }, {"##", 2} }, -- test .*
- { {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1} },
- { {"a2c3", "[0-9]", "#" }, {"a#c#", 2} }, -- test %d
- { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2} }, -- test %D
- { {"a \t\nb", "[ \t\n]", "#" }, {"a###b", 3} }, -- test %s
- { {"a \t\nb", "[^ \t\n]", "#" }, {"# \t\n#", 2} }, -- test %S
+ --{ s, p, f, n, res1, res2, res3 },
+ { {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test .
+ { {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+
+ { {"a2c3", ".*", "#" }, {"##", 2, 2} }, -- test .*
+ { {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1, 1} },
+ { {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d
+ { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D
+ { {"a \t\nb", "[ \t\n]", "#" }, {"a###b", 3, 3} }, -- test %s
+ { {"a \t\nb", "[^ \t\n]", "#" }, {"# \t\n#", 2, 2} }, -- test %S
}
end
@@ -214,17 +220,17 @@ local function set_f_gsub5 (lib, flg)
return {
Name = "Function gsub, set5",
Func = get_gsub (lib),
- --{ s, p, f, n, res1, res2 },
- { {subj, "a(.)c(.)", frep1 }, {subj, 1} },
- { {subj, "a(.)c(.)", frep2 }, {"#", 1} },
- { {subj, "a(.)c(.)", frep3 }, {"2,3", 1} },
- { {subj, "a.c.", frep3 }, {subj, 1} },
- { {subj, "z*", frep1 }, {subj, 5} },
- { {subj, "z*", frep2 }, {"#a#2#c#3#", 5} },
- { {subj, "z*", frep3 }, {subj, 5} },
+ --{ s, p, f, n, res1, res2, res3 },
+ { {subj, "a(.)c(.)", frep1 }, {subj, 1, 0} },
+ { {subj, "a(.)c(.)", frep2 }, {"#", 1, 1} },
+ { {subj, "a(.)c(.)", frep3 }, {"2,3", 1, 1} },
+ { {subj, "a.c.", frep3 }, {subj, 1, 1} },
+ { {subj, "z*", frep1 }, {subj, 5, 0} },
+ { {subj, "z*", frep2 }, {"#a#2#c#3#", 5, 5} },
+ { {subj, "z*", frep3 }, {subj, 5, 5} },
{ {subj, subj, frep4 }, "invalid return type" },
- { {"abc",".", frep5 }, {"777", 3} },
- { {"abc",".", frep6 }, {"7bc", 1} },
+ { {"abc",".", frep5 }, {"777", 3, 3} },
+ { {"abc",".", frep6 }, {"777", 3, 3} },
}
end
@@ -234,13 +240,44 @@ local function set_f_gsub6 (lib, flg)
return {
Name = "Function gsub, set6",
Func = get_gsub (lib),
- --{ s, p, f, n, res1, res2 },
- { {subj, "a(.)c(.)", tab1 }, {subj, 1} },
- { {subj, "a(.)c(.)", tab2 }, {"56", 1} },
+ --{ s, p, f, n, res1,res2,res3 },
+ { {subj, "a(.)c(.)", tab1 }, {subj, 1, 0} },
+ { {subj, "a(.)c(.)", tab2 }, {"56", 1, 1} },
{ {subj, "a(.)c(.)", tab3 }, "invalid replacement type" },
- { {subj, "a.c.", tab1 }, {subj, 1} },
- { {subj, "a.c.", tab2 }, {subj, 1} },
- { {subj, "a.c.", tab3 }, {subj, 1} },
+ { {subj, "a.c.", tab1 }, {subj, 1, 0} },
+ { {subj, "a.c.", tab2 }, {subj, 1, 0} },
+ { {subj, "a.c.", tab3 }, {subj, 1, 0} },
+ }
+end
+
+local function set_f_gsub8 (lib, flg)
+ local subj, patt, repl = "abcdef", "..", "*"
+ return {
+ Name = "Function gsub, set8",
+ Func = get_gsub (lib),
+ --{ s, p, f, n, res1, res2, res3 },
+ { {subj, patt, repl, function() end }, {"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return false end }, {"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return true end }, {"***", 3, 3} },
+ { {subj, patt, repl, function() return {} end }, {"***", 3, 3} },
+ { {subj, patt, repl, function() return "#" end }, {"###", 3, 3} },
+ { {subj, patt, repl, function() return 57 end }, {"575757", 3, 3} },
+ { {subj, patt, repl, function (from) return from end }, {"135", 3, 3} },
+ { {subj, patt, repl, function (from, to) return to end }, {"246", 3, 3} },
+ { {subj, patt, repl, function (from,to,rep) return rep end },
+ {"***", 3, 3} },
+ { {subj, patt, repl, function (from, to, rep) return rep..to..from end },
+ {"*21*43*65", 3, 3} },
+ { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return nil, nil end }, {"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return nil, false end }, {"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return nil, true end }, {"ab**", 3, 2} },
+ { {subj, patt, repl, function() return true, true end }, {"***", 3, 3} },
+ { {subj, patt, repl, function() return nil, 0 end }, {"abcdef", 1, 0} },
+ { {subj, patt, repl, function() return true, 0 end }, {"*cdef", 1, 1} },
+ { {subj, patt, repl, function() return nil, 1 end }, {"ab*ef", 2, 1} },
+ { {subj, patt, repl, function() return true, 1 end }, {"**ef", 2, 2} },
}
end
@@ -259,6 +296,7 @@ return function (libname)
set_f_gsub4 (lib),
set_f_gsub5 (lib),
set_f_gsub6 (lib),
+ set_f_gsub8 (lib),
set_f_plainfind (lib),
}
end
diff --git a/test/pcre_sets.lua b/test/pcre_sets.lua
index 865a10f..1897398 100755
--- a/test/pcre_sets.lua
+++ b/test/pcre_sets.lua
@@ -107,8 +107,10 @@ return function (libname)
set_f_find (lib, flags),
set_m_exec (lib, flags),
set_m_tfind (lib, flags),
- set_named_subpatterns (lib, flags),
}
+ if flags.MAJOR >= 4 then
+ table.insert (sets, set_named_subpatterns (lib, flags))
+ end
if flags.MAJOR >= 6 then
table.insert (sets, set_m_dfa_exec (lib, flags))
end
diff --git a/test/pcre_sets2.lua b/test/pcre_sets2.lua
index 7616540..8bdb3a3 100755
--- a/test/pcre_sets2.lua
+++ b/test/pcre_sets2.lua
@@ -13,15 +13,16 @@ local function set_f_gsub4 (lib, flg)
local set = {
Name = "Function gsub, set4",
Func = get_gsub (lib),
- --{ s, p, f, n, res1, res2 },
- { {"/* */ */", "%/%*(.*)%*%/", "#" }, {"#", 1} },
- { {"a2c3", ".-", "#" }, {"#a#2#c#3#", 5} }, -- test .-
- { {"/**/", "%/%*(.-)%*%/", "#" }, {"#", 1} },
- { {"/* */ */", "%/%*(.-)%*%/", "#" }, {"# */", 1} },
- { {"a2c3", "%d", "#" }, {"a#c#", 2} }, -- test %d
- { {"a2c3", "%D", "#" }, {"#2#3", 2} }, -- test %D
- { {"a \t\nb", "%s", "#" }, {"a###b", 3} }, -- test %s
- { {"a \t\nb", "%S", "#" }, {"# \t\n#", 2} }, -- test %S
+ --{ s, p, f, n, res1, res2, res3 },
+ { {"/* */ */", "%/%*(.*)%*%/", "#" }, {"#", 1, 1} },
+ { {"a2c3", ".-", "#" }, {"#a#2#c#3#", 5, 5} }, -- test .-
+ { {"/**/", "%/%*(.-)%*%/", "#" }, {"#", 1, 1} },
+ { {"/* */ */", "%/%*(.-)%*%/", "#" }, {"# */", 1, 1} },
+ { {"a2c3", "%d", "#" }, {"a#c#", 2, 2} }, -- test %d
+ { {"a2c3", "%D", "#" }, {"#2#3", 2, 2} }, -- test %D
+ { {"a \t\nb", "%s", "#" }, {"a###b", 3, 3} }, -- test %s
+ { {"a \t\nb", "%S", "#" }, {"# \t\n#", 2, 2} }, -- test %S
+ { {"abcd", "\\b", "%1"}, {"abcd", 2, 2} },
}
-- convert patterns: lua -> pcre
for _, test in ipairs (set) do
@@ -157,7 +158,7 @@ local function set_f_gsub7 (lib, flg)
-- fill in reference results
for _,v in ipairs(set) do
local r0, r1, r2 = pcall (string.gsub, unpack (v[1]))
- v[2] = r0 and { r1, r2 } or { r0, r1 }
+ v[2] = r0 and { r1, r2, r2 } or { r0, r1 }
end
-- convert patterns: lua -> pcre
for _, test in ipairs (set) do
@@ -169,9 +170,12 @@ end
return function (libname)
local lib = require (libname)
local flags = lib.flags and lib.flags ()
- return {
+ local sets = {
set_f_gsub4 (lib, flags),
- set_f_gsub7 (lib, flags),
}
+ if flags.MAJOR*100 + flags.MINOR > 405 then
+ table.insert (sets, set_f_gsub7 (lib, flags))
+ end
+ return sets
end