diff options
author | shmuz <shmuz> | 2007-03-17 11:09:32 +0000 |
---|---|---|
committer | shmuz <shmuz> | 2007-03-17 11:09:32 +0000 |
commit | ce25d628c034b51c8d03e890e876a89601960e03 (patch) | |
tree | 98a7f155c4f9b2c8307b1058530ff90d4158282a | |
parent | 1c42e04250bfdf6cbd6e91cdc68d7ba999a7b650 (diff) | |
download | lrexlib-ce25d628c034b51c8d03e890e876a89601960e03.tar.gz |
many changes: see ChangeLog
-rwxr-xr-x | ChangeLog | 230 | ||||
-rwxr-xr-x | LICENSE | 2 | ||||
-rwxr-xr-x | README | 2 | ||||
-rwxr-xr-x | doc/index.txt | 2 | ||||
-rwxr-xr-x | doc/license.html | 2 | ||||
-rwxr-xr-x | doc/manual.html | 118 | ||||
-rwxr-xr-x | doc/manual.txt | 97 | ||||
-rwxr-xr-x | src/common.c | 56 | ||||
-rwxr-xr-x | src/common.h | 31 | ||||
-rwxr-xr-x | src/lpcre.c | 453 | ||||
-rwxr-xr-x | src/lpcre_f.c | 12 | ||||
-rwxr-xr-x | src/lposix.c | 470 | ||||
-rwxr-xr-x | src/rex_pcre.mak | 2 | ||||
-rwxr-xr-x | src/rex_posix.mak | 2 | ||||
-rwxr-xr-x | src/scite.properties | 15 | ||||
-rwxr-xr-x | test/common_sets.lua | 142 | ||||
-rwxr-xr-x | test/pcre_sets.lua | 4 | ||||
-rwxr-xr-x | test/pcre_sets2.lua | 28 |
18 files changed, 603 insertions, 1065 deletions
@@ -1,4 +1,82 @@ -2007-02-12 Shmuel +2007-03-10 Shmuel Zeigerman <shmuz@actcom.co.il> + + * algo.h: many functions added. + * lpcre.c, lposix.c, ltre.c: many functions removed. + +2007-03-09 Shmuel Zeigerman <shmuz@actcom.co.il> + + * algo.h: new file added -- to contain the code of common algorithms. + * It is, in fact, a C-file. + * gsub, match, find: functions added + + * lpcre.c, lposix.c, ltre.c (gsub, match, find): functions removed. + +2007-03-07 Shmuel Zeigerman <shmuz@actcom.co.il> + + * lposix.c (REX_NSUB_BASE1): macro added, to facilitate building for + Tom Lord's library. + +2007-03-06 Shmuel Zeigerman <shmuz@actcom.co.il> + + * lposix.c, ltre.c (gmatch, split, gsub): bugfixes: + incorrect processing of patterns anchored at the beginning. + Was cured that way: if (offset > 0) eflags |= REG_NOTBOL; + + * test/common_sets.lua: test cases added. + +2007-03-05 Shmuel Zeigerman <shmuz@actcom.co.il> + + * lpcre.c (gsub): bugfix: + Test: { {"abcd", "\\b", "%1"}, {"abcd", 2, 2} }. + + * test/pcre_sets2.lua: a test case added. + +2007-03-03 Shmuel Zeigerman <shmuz@actcom.co.il> + + * lpcre.c, lposix.c, ltre.c (gsub): [API change]: + gsub returns 3 values; the 3-rd is number of replacements made. + + * test/*.lua: corrected tests for gsub (after API change). + +2007-03-02 Shmuel Zeigerman <shmuz@actcom.co.il> + + * test/common_sets.lua: added new tests for gsub (after API change). + +2007-03-01 Shmuel Zeigerman <shmuz@actcom.co.il> + + * lpcre.c, lposix.c, ltre.c (gsub): [API change]: + a) 2-nd return of rep() is ignored --> API-compatible with string.gsub + b) argument `n' can be a function --> API extension wrt string.gsub + + * common.h (REX_VERSION): updated to "2.2.0 beta". + +2007-02-23 Shmuel Zeigerman <shmuz@actcom.co.il> + + * lpcre.c, lpcre_f.c: + * added #ifdef's to do named subpatterns only if PCRE_MAJOR >= 4, + otherwise it wouldn't compile for PCRE 3.x [bugreport by Zhao Zhiguo]. + * same #ifdef for Lpcre_config function. + +2007-02-20 Shmuel Zeigerman <shmuz@actcom.co.il> + + * ltre.c (aexec, atfind, have_backrefs, have_approx): new methods. + aexec --> exec + approximate matching; + atfind --> tfind + approximate matching; + have_backrefs --> binding of tre_have_backrefs; + have_approx --> binding of tre_have_approx; + +2007-02-18 Shmuel Zeigerman <shmuz@actcom.co.il> + + * ltre.c: all uses of regexec replaced by regnexec. + * test/common_sets.lua: added tests with nuls in the subject. + +2007-02-17 Shmuel Zeigerman <shmuz@actcom.co.il> + + * ltre.c: new file added (started the binding of the TRE regex library). + * test/posix_sets.lua: added tests with nuls in the subject or/and + the pattern. + +2007-02-12 Shmuel Zeigerman <shmuz@actcom.co.il> * all sources (match, find, tfind, exec, dfa_exec): [API change] in case of ordinary non-match, only a nil is returned; @@ -12,7 +90,7 @@ * common.h (REX_VERSION): updated to "2.1.0". -2007-01-29 Shmuel +2007-01-29 Shmuel Zeigerman <shmuz@actcom.co.il> * lposix.c (checkarg_find_f): [bugfix] incorrect default for eflags. * lposix.c (generic_find): [bugfix] dereferencing uninitialized pointer. @@ -24,34 +102,34 @@ - it was incorrectly assumed that the value on Lua stack was a string; - luaL_error was used where lua_error was more appropriate; -2007-01-18 Shmuel +2007-01-18 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c (gsub): [API change undone]. * all source files: refactoring. -2007-01-14 Shmuel +2007-01-14 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c (gsub): [API change]: a) 2-nd return of rep() is ignored --> API-compatible with string.gsub b) argument `n' can be a function --> API extension wrt string.gsub -2007-01-13 Shmuel +2007-01-13 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c: refactoring. * common.c, common.h: refactoring. * common.h (REX_VERSION): a new #define. * lpcre.c, lposix.c (REX_OPENLIB): using REX_VERSION. -2007-01-12 Shmuel +2007-01-12 Shmuel Zeigerman <shmuz@actcom.co.il> * common.h, common.c: a nasty bug fixed. * lpcre.c, lposix.c: version updated to 2.0.1. -2007-01-10 Shmuel +2007-01-10 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c: refactoring. -2007-01-08 Shmuel +2007-01-08 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c: refactoring. @@ -60,18 +138,18 @@ * Add a top-level Makefile with all, clean and test targets. * Split src/*.mak common parts into src/common.mak. -2007-01-04 Shmuel +2007-01-04 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c (Lpcre_gsub): unnecessary (though harmless) assignment removed. * test/runtest.lua: extended the command-line interface. * most files: the copyright notice changed to reference LICENSE file. -2007-01-02 Shmuel +2007-01-02 Shmuel Zeigerman <shmuz@actcom.co.il> * common.c (CheckFunction, OptFunction): functions removed. * lpcre.c (put_integer): function removed. -2006-12-31 Shmuel +2006-12-31 Shmuel Zeigerman <shmuz@actcom.co.il> * common.c (udata_tostring): function removed. * lpcre.c (Lpcre_tostring): added handling of deleted userdatum. @@ -79,21 +157,21 @@ * lpcre.c: [API change] method `exec' now supports "named subpatterns". * test/all_test.lua: renamed to runtest.lua. -2006-12-30 Shmuel +2006-12-30 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c: [API change] `versionPCRE' renamed to `version'. -2006-12-29 Shmuel +2006-12-29 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c: [API change] removed support of PCRE callout. * test/pcre_sets.lua: removed testing of PCRE callout. -2006-12-27 Shmuel +2006-12-27 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre_f.c (config): [API change] the function accepts one optional argument (a table), like the `flags' function. -2006-12-26 Shmuel +2006-12-26 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c (gsub): [API change] method become function. * test/*.lua: modifying tests to reflect methods become functions. @@ -104,16 +182,16 @@ * lpcre.c, lposix.c (checkarg_gsub): if the 3-rd argument is of type "number" then it is converted to string (as in string.gsub). -2006-12-25 Shmuel +2006-12-25 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c (match, find, gmatch, split): [API change] methods become functions. -2006-12-23 Shmuel +2006-12-23 Shmuel Zeigerman <shmuz@actcom.co.il> * test/*.lua: refactoring. -2006-12-22 Shmuel +2006-12-22 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c (tfind and exec): [API change] the return value of the underlying pcre_exec/regexec call @@ -124,32 +202,32 @@ * lua/*.lua: deleted all lua files except for rex.lua. * lua/rex.lua: fully rewritten; now contains wrappers for all methods. -2006-12-21 Shmuel +2006-12-21 Shmuel Zeigerman <shmuz@actcom.co.il> * test/*.lua: added tests for `split' method. * test/luatest.lua (print_results): function added. * test/common_sets.lua: file added. -2006-12-19 Shmuel +2006-12-19 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.lua: `split' method added. * lposix.lua: `split' method added. * test/framework.lua: renamed to luatest.lua. -2006-12-15 Shmuel +2006-12-15 Shmuel Zeigerman <shmuz@actcom.co.il> * test/*.lua: refactoring. -2006-12-11 Shmuel +2006-12-11 Shmuel Zeigerman <shmuz@actcom.co.il> * test/*.lua: refactoring; deleting files; adding new files. -2006-12-10 Shmuel +2006-12-10 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c: gmatch bug fixed. Test case added. * lpcre.c, lposix.c: gtfind bug fixed. Test case added. -2006-12-09 Shmuel +2006-12-09 Shmuel Zeigerman <shmuz@actcom.co.il> * lua/gsub_test.lua, gsub_tstpsx.lua: 2 files deleted. * lua/posix_sets.lua, pcre_sets.lua, framework.lua, all_test.lua: @@ -159,27 +237,27 @@ is supplied as the 'n' parameter, then no iterations are done. Test cases added to *.lua test files. -2006-12-07 Shmuel +2006-12-07 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c (gsub): if parameter rep is a function, and its 2-nd return value (if present) is a string "break", then gsub immediately returns. * test/*.lua: refactoring. -2006-12-06 Shmuel +2006-12-06 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c: refactoring. -2006-12-05 Shmuel +2006-12-05 Shmuel Zeigerman <shmuz@actcom.co.il> * lposix.c: cosmetics. -2006-12-04 Shmuel +2006-12-04 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c: introduced new macros: CAP_BEG, CAP_END and CAP_LEN. * lpcre.c, lpcre_f.c, lposix.c: refactoring. -2006-12-03 Shmuel +2006-12-03 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c (Lpcre_gsub_func): an improvement. * lposix.c (posix_gsub_func): an improvement. @@ -188,7 +266,7 @@ * lpcre.c, lposix.c: gsub function become method. * test/posix_test.lua, pcre_test.lua: removed tests for removed functions. -2006-12-02 Shmuel +2006-12-02 Shmuel Zeigerman <shmuz@actcom.co.il> * common.c, common.h (TBuffer): moved here from lpcre.c. * common.c, common.h (TFreeList): helper class created. @@ -198,18 +276,18 @@ * test/gsub_tstpsx.lua: file added. * test/rex_.lua: file deleted. -2006-12-01 Shmuel +2006-12-01 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c (TBuffer): helper class created. * lpcre.c (Lpcre_gsub_func): many changes. -2006-11-30 Shmuel +2006-11-30 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c (TExecData): struct renamed to TCallout. * lpcre.c (LpcreSetExecData): function renamed to SetupCallout. * lpcre.c (Lpcre_gsub_func): added function gsub. -2006-11-29 Shmuel +2006-11-29 Shmuel Zeigerman <shmuz@actcom.co.il> * test/*.lua: refactoring. * lpcre.c, lposix.c: refactoring. @@ -217,23 +295,23 @@ * lpcre.c, lposix.c (oldgmatch): renamed to tgfind. * */*.lua: renamed: oldmatch -> tfind; oldgmatch -> tgfind. -2006-11-28 Shmuel +2006-11-28 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c (Lpcre_dfa_exec): one Lmalloc call instead of two. -2006-11-27 Shmuel +2006-11-27 Shmuel Zeigerman <shmuz@actcom.co.il> * common.c (plainfind_func): rewritten to not use memicmp * test/*.lua: every test returns number of failures -2006-11-26 Shmuel +2006-11-26 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c: all Check_arg_* functions renamed to Checkarg_* * lpcre.c (Lpcre_dfa_exec): added 2 arguments to dfa_exec * common.h (DIM): macro removed * test/pcre_test.lua: tests for dfa_exec method added -2006-11-25 Shmuel +2006-11-25 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c: alpha -> beta. * lpcre_f.c: file added (was: part of lpcre.c). @@ -245,48 +323,48 @@ * test/*.lua: refactoring. * lua/generic_gsub.lua: refactoring. -2006-11-23 Shmuel +2006-11-23 Shmuel Zeigerman <shmuz@actcom.co.il> * test/*.lua: refactoring. * lua/rex.lua, lua/rex_.lua (gsub): 6th and 7th arguments swapped. -2006-11-22 Shmuel +2006-11-22 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c (Check_arg_findmatch_func): 5th and 6th arguments swapped. * lpcre.c (Check_arg_gmatch_func): 4th and 5th arguments swapped. * test/posix_test.lua: file added. * lposix.c: 2 bugs fixed. -2006-11-21 Shmuel +2006-11-21 Shmuel Zeigerman <shmuz@actcom.co.il> * test/pcre_test.lua: "named subpatterns" tests added. -2006-11-20 Shmuel +2006-11-20 Shmuel Zeigerman <shmuz@actcom.co.il> * common.c, common.h (plainfind_func): function added. * lpcre.c, lposix.c (rex.plainfind): new function (from Lua side). * test/framework.lua: file added. * test/pcre_test.lua: file added. -2006-11-19 Shmuel +2006-11-19 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c (Lpcre_config): new function (pcre.config from Lua side). * lpcre.c: callout handling improved. -2006-11-18 Shmuel +2006-11-18 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c: old 'gmatch' method put back; renamed into 'oldgmatch'. -2006-11-17 Shmuel +2006-11-17 Shmuel Zeigerman <shmuz@actcom.co.il> * windows/bcc32/make_bcc.mak: deleted -DCOMPAT51, added -D$(CMDLINE) -2006-11-16 Shmuel +2006-11-16 Shmuel Zeigerman <shmuz@actcom.co.il> * gsub_test.lua (PatternLua2Pcre): function renamed into 'pat2pcre'. * lua/pat2pcre.lua: file added (was part of gsub_test.lua). -2006-11-15 Shmuel +2006-11-15 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c, lposix.c: Lua API has changed: * 'match' method renamed to 'oldmatch' @@ -300,18 +378,18 @@ * generic_gsub.lua: rex.oldmatch is used instead of rex.match. * test/test1.lua: file added -2006-11-12 Shmuel +2006-11-12 Shmuel Zeigerman <shmuz@actcom.co.il> * rex.lua (r:gmatch): metamethod added. * lpcre.c (Lpcre_maketables): function API simplified. * lpcre.c (Lpcre_getargs): function removed. * lpcre.c (LpcreGetExecParams): renamed from LpcreProcessExecParams. -2006-11-09 Shmuel +2006-11-09 Shmuel Zeigerman <shmuz@actcom.co.il> * lua/bit.lua: file removed. -2006-11-05 Shmuel +2006-11-05 Shmuel Zeigerman <shmuz@actcom.co.il> * rex.lua (gsub): a few structural optimizations. * rex.lua (gmatch): a bugfix. @@ -320,7 +398,7 @@ * new_gsub.lua: file deleted. * lua/bit.lua: file added. -2006-11-04 Reuben +2006-11-04 Reuben Thomas <rrt@sc3d.org> * rex.lua (gmatch): function rewritten to be compatible with string.gmatch. @@ -328,19 +406,19 @@ * lpcre.c (Lpcre_gmatch): function deleted. * lposix.c (posix_gmatch): function deleted. -2006-11-04 Shmuel +2006-11-04 Shmuel Zeigerman <shmuz@actcom.co.il> * gsub.lua, new_gsub.lua, gsub_test.lua, rex.lua: bugfixes. -2006-10-18 Shmuel +2006-10-18 Shmuel Zeigerman <shmuz@actcom.co.il> * [Windows] DLL builds need lua5.1.dll rather than lua51.dll. -2006-10-02 Shmuel +2006-10-02 Shmuel Zeigerman <shmuz@actcom.co.il> * Support of Lua 5.0 was dropped. -2006-09-03 Shmuel +2006-09-03 Shmuel Zeigerman <shmuz@actcom.co.il> * common.h (REX_LIB_API): renamed into REX_API. * common.h (flags_pair): renamed into flag_pair. @@ -357,51 +435,51 @@ * added directory test/Spencer (containing test.lua). * test/Spencer/test.lua: file returns a function. -2006-08-27 Shmuel +2006-08-27 Shmuel Zeigerman <shmuz@actcom.co.il> * (local) merge with the version put into CVS by R.Thomas. * common.h: added conditional #define's for lua_pushinteger and lua_tointeger (needed to compile with Lua 5.0). -2006-08-18 Shmuel +2006-08-18 Shmuel Zeigerman <shmuz@actcom.co.il> * common.c, lpcre.c, lposix.c: lua_pushnumber replaced with lua_pushinteger where appropriate (in many places). * lpcre.c (put_number): function renamed into put_integer. -2006-06-17 Shmuel +2006-06-17 Shmuel Zeigerman <shmuz@actcom.co.il> * lpcre.c (TPcreExecParam): `use_callout' struct member was eliminated. Instead, a special value of function reference (LUA_NOREF) is used. -2006-04-01 Reuben +2006-04-01 Reuben Thomas <rrt@sc3d.org> * lpcre.c, common.c: Remove trailing whitespace. * lpcre.c (Lpcre_gmatch): Remove unnecessary limit variable. -2006-02-17 Shmuel +2006-02-17 Shmuel Zeigerman <shmuz@actcom.co.il> * common.c, common.h (L_lua_error): function deleted. * common.c, lpcre.c, lposix.c: luaL_error used in place of L_lua_error. * lposix.c (posix2): struct renamed into TPosix. * lposix.c (posix_comp): lua_newuserdata() used instead of Lmalloc(). -2005-12-26 Shmuel +2005-12-26 Shmuel Zeigerman <shmuz@actcom.co.il> * Separate makefiles for POSIX and PCRE. No config file. -2005-11-28 Shmuel +2005-11-28 Shmuel Zeigerman <shmuz@actcom.co.il> * common.h (REXLIB_API): macro renamed into REX_LIB_API. * lposix.c (LREXLIB_POSIX_EXT): macro renamed into REX_POSIX_EXT. -2005-11-26 Shmuel +2005-11-26 Shmuel Zeigerman <shmuz@actcom.co.il> * common.h (LUAL_REGISTER): macro renamed into REX_REGISTER. * lpcre.c (LUAOPEN_LIB): macro renamed into REX_OPENLIB. * lpcre.c (LIBNAME): macro renamed into REX_LIBNAME. -2005-11-15 Shmuel +2005-11-15 Shmuel Zeigerman <shmuz@actcom.co.il> * common.c, common.h, lposix.c, lpcre.c: new files (lrexlib.c was splitted); POSIX and PCRE parts now live in their own @@ -417,11 +495,11 @@ have aliases (flags and new, correspondently). * lpcre.c (Lpcre_vers): function renamed into Lpcre_version. -2005-11-12 Shmuel +2005-11-12 Shmuel Zeigerman <shmuz@actcom.co.il> * lrexlib.c (LUAL_REGISTER): new macro. -2005-11-10 Shmuel +2005-11-10 Shmuel Zeigerman <shmuz@actcom.co.il> * lrexlib.c (posix_match_generic): return (on Lua-side) an additional value (the return code of regexec). @@ -430,7 +508,7 @@ * lrexlib.c (Lpcre_dfa_exec): return (on Lua-side) an additional value (the return code of pcre_dfa_exec). -2005-10-29 Shmuel +2005-10-29 Shmuel Zeigerman <shmuz@actcom.co.il> * lrexlib.c (get_flags): the function now accepts one parameter from the Lua stack (a table). Was: no parameters. (This @@ -445,7 +523,7 @@ * lrexlib.c: added all missing PCRE flags starting from PCRE version 4. -2005-10-25 Shmuel +2005-10-25 Shmuel Zeigerman <shmuz@actcom.co.il> * lrexlib.c (DIM): new macro. * lrexlib.c (pcre2): renamed into TPcre. @@ -454,7 +532,7 @@ * lrexlib.c (Lpcre_dfa_exec): new function. Lua-side: dfa_exec. * lrexlib.c (Lpcre_dfa_restart): new function. Lua-side: dfa_restart. -2005-10-23 Shmuel +2005-10-23 Shmuel Zeigerman <shmuz@actcom.co.il> * lrexlib.c (TPcreCalloutData): new struct. * lrexlib.c (Lpcre_callout): new function. @@ -462,11 +540,11 @@ * lrexlib.c: added new PCRE flags (from PCRE versions 5 and 6). * lrexlib.c (put_number): new helper function. -2005-05-27 Shmuel +2005-05-27 Shmuel Zeigerman <shmuz@actcom.co.il> * lrexlib.c: updated to work with Compat-5.1. -2004-12-18 Shmuel +2004-12-18 Shmuel Zeigerman <shmuz@actcom.co.il> * lrexlib.c (regex_tostring): error handling added. * lrexlib.c (regex_tostring): renamed into udata_tostring. @@ -489,16 +567,16 @@ * lrexlib.c (Lpcre_push_substrings): added support for "named subpatterns". -2004-09-15 Shmuel +2004-09-15 Shmuel Zeigerman <shmuz@actcom.co.il> * gsub.lua (is_odd): Function removed; math.mod used instead. -2004-08-24 Shmuel +2004-08-24 Shmuel Zeigerman <shmuz@actcom.co.il> * ChangeLog: File added. * NEWS: File added. -2004-08-12 Shmuel +2004-08-12 Shmuel Zeigerman <shmuz@actcom.co.il> * lrexlib.c (posix_exec, Lpcre_exec): New C functions, that correspond to the new lua function 'r:exec'. @@ -515,17 +593,17 @@ (posix_push_offsets, Lpcre_push_offsets): New functions. * gsub.lua: File added. -2004-08-11 Reuben +2004-08-11 Reuben Thomas <rrt@sc3d.org> * config: Many changes. * Makefile: Many changes. -2004-08-11 Shmuel +2004-08-11 Shmuel Zeigerman <shmuz@actcom.co.il> * config: File added + many changes. * Makefile: Many changes. -2004-07-15 Shmuel +2004-07-15 Shmuel Zeigerman <shmuz@actcom.co.il> * lrexlib.c (posix_get_flags, Lpcre_get_flags, Lpcre_vers): New C functions, that correspond to new lua functions @@ -1,4 +1,4 @@ -License of Lrexlib release 2.1 +License of Lrexlib release 2.2 ------------------------------ Copyright (C) Reuben Thomas 2000-2007 @@ -1,4 +1,4 @@ - Lua rexlib 2.1 + Lua rexlib 2.2 -------------- by Reuben Thomas (rrt@sc3d.org) diff --git a/doc/index.txt b/doc/index.txt index bdaa8d9..e399316 100755 --- a/doc/index.txt +++ b/doc/index.txt @@ -1,4 +1,4 @@ -Lrexlib 2.1 +Lrexlib 2.2 =========== | by Reuben Thomas (rrt@sc3d.org) diff --git a/doc/license.html b/doc/license.html index 8967e73..864338f 100755 --- a/doc/license.html +++ b/doc/license.html @@ -5,7 +5,7 @@ </head> <body> -<h2>Lrexlib 2.1</h2> +<h2>Lrexlib 2.2</h2> <p>Copyright © Reuben Thomas 2000-2007<br> Copyright © Shmuel Zeigerman 2004-2007 diff --git a/doc/manual.html b/doc/manual.html index e76a601..3fbd173 100755 --- a/doc/manual.html +++ b/doc/manual.html @@ -4,12 +4,12 @@ <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="Docutils 0.5: http://docutils.sourceforge.net/" /> -<title>Lrexlib 2.1 Reference Manual</title> +<title>Lrexlib 2.2 Reference Manual</title> <link rel="stylesheet" href="lrexlib.css" type="text/css" /> </head> <body> -<div class="document" id="lrexlib-2-1-reference-manual"> -<h1 class="title">Lrexlib 2.1 Reference Manual</h1> +<div class="document" id="lrexlib-2-2-reference-manual"> +<h1 class="title">Lrexlib 2.2 Reference Manual</h1> <div class="contents topic"> <p class="topic-title first"><a id="table-of-contents" name="table-of-contents">Table of Contents</a></p> @@ -308,8 +308,8 @@ till the subject fails to match.</p> <div class="section"> <h3><a class="toc-backref" href="#id7" id="gsub" name="gsub">gsub</a></h3> <p><tt class="funcdef docutils literal"><span class="pre">rex.gsub</span> <span class="pre">(subj,</span> <span class="pre">patt,</span> <span class="pre">repl,</span> <span class="pre">[n],</span> <span class="pre">[cf],</span> <span class="pre">[ef],</span> <span class="pre">[lo])</span></tt></p> -<p>The function searches for all matches of the pattern <em>patt</em> in the string <em>subj</em> -and substitutes the found matches according to the parameter <em>repl</em> (see details +<p>This function searches for all matches of the pattern <em>patt</em> in the string +<em>subj</em> and replaces them according to the parameters <em>repl</em> and <em>n</em> (see details below).</p> <p>PCRE: A locale <em>lo</em> may be specified.</p> <blockquote> @@ -345,8 +345,8 @@ below).</p> </tr> <tr><td>[n]</td> <td>maximum number of matches to search -for; unlimited if not supplied</td> -<td>number</td> +for, or control function, or nil</td> +<td>number or function</td> <td><tt class="docutils literal"><span class="pre">nil</span></tt></td> </tr> <tr><td>[cf]</td> @@ -372,11 +372,14 @@ for; unlimited if not supplied</td> <dd><ol class="first last arabic simple"> <li>The subject string with the substitutions made.</li> <li>Number of matches found.</li> +<li>Number of substitutions made.</li> </ol> </dd> <dt><strong>Details:</strong></dt> -<dd><p class="first">The parameter <em>repl</em> can be either a string, a function or a table. The -function behaves differently depending on the <em>repl</em> type:</p> +<dd><p class="first">The parameter <em>repl</em> can be either a string, a function or a table. +On each match made, it is converted into a value <em>repl_out</em> that may be used +for the replacement.</p> +<p><em>repl_out</em> is generated differently depending on the type of <em>repl</em>:</p> <ol class="arabic simple"> <li>If <em>repl</em> is a <em>string</em> then it is treated as a template for substitution, where the %X occurences in <em>repl</em> are handled in a special way, depending @@ -398,48 +401,75 @@ string</li> </ul> </li> <li>if X is any non-digit character then %X is substituted by X</li> -<li>all parts of <em>repl</em> other than %X are copied to the output string -verbatim.</li> </ul> +<p>All parts of <em>repl</em> other than %X are copied to <em>repl_out</em> verbatim.</p> </blockquote> <ol class="arabic simple" start="2"> -<li>If <em>repl</em> is a <em>function</em> then it gets called on each match with the +<li>If <em>repl</em> is a <em>function</em> then it is called on each match with the submatches passed as parameters (if there are no submatches then the entire -match is passed as the only parameter). The substitution string is derived -depending on the first return value of function <em>repl</em>:</li> +match is passed as the only parameter). <em>repl_out</em> is the return value of +the <em>repl</em> call, and is interpreted as follows:</li> </ol> <blockquote> <ul class="simple"> -<li>if it is a string then it is used as a substitution for the current match.</li> -<li>if it is either of nothing, <tt class="docutils literal"><span class="pre">nil</span></tt> or <tt class="docutils literal"><span class="pre">false</span></tt> then no substitution is -made.</li> -<li>values of other types generate an error.</li> -</ul> -<p>Though <a class="reference" href="#gsub">gsub</a> is in general consistent with the API and behavior of Lua's -<em>string.gsub</em>, it has one extension with regards to <em>string.gsub</em> behavior:</p> -<ul class="simple"> -<li>if function <em>repl</em> returns more than one value and its second return value -is the literal string <em>"break"</em>, then <a class="reference" href="#gsub">gsub</a> stops searching for further -matches in the subject and returns.</li> +<li>if it is a string or a number (coerced to a string), then the replacement +value is that string;</li> +<li>if it is a <tt class="docutils literal"><span class="pre">nil</span></tt> or a <tt class="docutils literal"><span class="pre">false</span></tt>, then no replacement is to be done;</li> </ul> </blockquote> <ol class="arabic simple" start="3"> -<li>If <em>repl</em> is a <em>table</em> then the first submatch (or the entire match if -there are no submatches) is used as the key and the value stored in <em>repl</em> -under that key is used for substitution depending on its type.</li> +<li>If <em>repl</em> is a table then <em>repl_out</em> is <em>repl</em> [m1], where m1 is the first +submatch (or the entire match if there are no submatches), following the +same rules as for the return value of <em>repl</em> call, described in the above +paragraph.</li> </ol> -<blockquote class="last"> +<p>Note: Under some circumstances, the value of <em>repl_out</em> may be ignored; see +<a class="reference" href="#below">below</a>.</p> +<p>gsub behaves differently depending on the type of <em>n</em>:</p> +<ol class="last arabic simple"> +<li>If <em>n</em> is a <em>number</em> then it is treated as the maximum number of matches +to search for (an omitted or <tt class="docutils literal"><span class="pre">nil</span></tt> value means an unlimited number of +matches). On each match, the replacement value is the <em>repl_out</em> string +(see above).</li> +</ol> +</dd> +</dl> +<blockquote id="below"> +<ol class="arabic" start="2"> +<li><p class="first">If <em>n</em> is a function, then it is called on each match, after <em>repl_out</em> is +produced (so if <em>repl</em> is a function, it will be called prior to the <em>n</em> +call).</p> +<p><em>n</em> receives 3 arguments and returns 2 values. Its arguments are:</p> +<blockquote> +<ol class="arabic simple"> +<li>The start offset of the match (a number)</li> +<li>The end offset of the match (a number)</li> +<li><em>repl_out</em></li> +</ol> +</blockquote> +<p>The type of its first return controls the replacement produced by gsub for +the current match:</p> +<blockquote> <ul class="simple"> -<li>If no value is stored under the key but <em>repl</em> has a metatable with the -<em>__index</em> field set then the correspondent metamethod will be called for -obtaining the value.</li> -<li>The obtained value is used for the substitution following exactly same -rules as for the first return value of <em>repl</em> described in the above -paragraph.</li> +<li><tt class="docutils literal"><span class="pre">true</span></tt> -- replace/don't replace, according to <em>repl_out</em>;</li> +<li><tt class="docutils literal"><span class="pre">nil</span></tt>/<tt class="docutils literal"><span class="pre">false</span></tt> -- don't replace;</li> +<li>a string (or a number coerced to a string) -- replace by that string;</li> </ul> </blockquote> -</dd> -</dl> +<p>The type of its second return controls gsub behavior after the current +match is handled:</p> +<blockquote> +<ul class="simple"> +<li><tt class="docutils literal"><span class="pre">nil</span></tt>/<tt class="docutils literal"><span class="pre">false</span></tt> -- no changes: <em>n</em> will be called on the next match;</li> +<li><tt class="docutils literal"><span class="pre">true</span></tt> -- search for an unlimited number of matches; <em>n</em> will not be +called anymore;</li> +<li>a number -- maximum number of matches to search for, beginning from the +next match; <em>n</em> will not be called anymore;</li> +</ul> +</blockquote> +</li> +</ol> +</blockquote> </div> <hr class="docutils" /> <div class="section"> @@ -678,7 +708,7 @@ documentation.</p> <hr class="docutils" /> <div class="section"> <h3><a class="toc-backref" href="#id12" id="config" name="config">config</a></h3> -<p>[PCRE only. See <em>pcre_config</em> in the <a class="reference" href="http://www.pcre.org/pcre.txt">PCRE</a> docs.]</p> +<p>[PCRE 4.0 and later. See <em>pcre_config</em> in the <a class="reference" href="http://www.pcre.org/pcre.txt">PCRE</a> docs.]</p> <p><tt class="funcdef docutils literal"><span class="pre">rex.config</span> <span class="pre">([tb])</span></tt></p> <p>This function returns a table containing the values of the configuration parameters used at PCRE library build-time. Those parameters (numbers) are @@ -956,7 +986,7 @@ and 25 then the function returns the following: 10, { 25,20,15 }, 3.</dd> <hr class="docutils" /> <div class="section"> <h2><a class="toc-backref" href="#id18" id="incompatibilities-with-the-previous-versions" name="incompatibilities-with-the-previous-versions">Incompatibilities with the Previous Versions</a></h2> -<p><strong>The following changes are incompatible with Lrexlib version 1.19:</strong></p> +<p><strong>Incompatibilities with the version 1.19:</strong></p> <blockquote> <ol class="arabic simple"> <li>Lua 5.1 is required</li> @@ -970,7 +1000,7 @@ and 25 then the function returns the following: 10, { 25,20,15 }, 3.</dd> subpatterns</em> (PCRE only)</li> </ol> </blockquote> -<p><strong>The following changes are incompatible with Lrexlib version 2.0:</strong></p> +<p><strong>Incompatibilities with the version 2.0:</strong></p> <blockquote> <ol class="arabic simple"> <li><a class="reference" href="#match">match</a>, <a class="reference" href="#find">find</a>, <a class="reference" href="#tfind">tfind</a>, <a class="reference" href="#exec">exec</a>, <a class="reference" href="#dfa-exec">dfa_exec</a>: only one value (a <tt class="docutils literal"><span class="pre">nil</span></tt>) is @@ -978,11 +1008,17 @@ returned when the subject does not match the pattern. Any other failure generates an error.</li> </ol> </blockquote> +<p><strong>Incompatibilities with the version 2.1:</strong></p> +<blockquote> +<ol class="arabic simple"> +<li><a class="reference" href="#gsub">gsub</a>: a special "break" return of <em>repl</em> function is deprecated.</li> +</ol> +</blockquote> </div> </div> <div class="footer"> <hr class="footer" /> -Generated on: 2007-02-12 18:37 UTC. +Generated on: 2007-03-17 09:55 UTC. </div> </body> diff --git a/doc/manual.txt b/doc/manual.txt index b2e4f52..e9bca04 100755 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -1,6 +1,6 @@ .. role:: funcdef(literal) -Lrexlib 2.1 Reference Manual +Lrexlib 2.2 Reference Manual ============================ .. contents:: Table of Contents @@ -184,8 +184,8 @@ gsub :funcdef:`rex.gsub (subj, patt, repl, [n], [cf], [ef], [lo])` -The function searches for all matches of the pattern *patt* in the string *subj* -and substitutes the found matches according to the parameter *repl* (see details +This function searches for all matches of the pattern *patt* in the string +*subj* and replaces them according to the parameters *repl* and *n* (see details below). PCRE: A locale *lo* may be specified. @@ -199,8 +199,8 @@ PCRE: A locale *lo* may be specified. +---------+-----------------------------------+-------------------------+-------------+ | repl |substitution source |string, function or table| n/a | +---------+-----------------------------------+-------------------------+-------------+ - | [n] |maximum number of matches to search| number | ``nil`` | - | |for; unlimited if not supplied | | | + | [n] |maximum number of matches to search| number or function | ``nil`` | + | |for, or control function, or nil | | | +---------+-----------------------------------+-------------------------+-------------+ | [cf] |compilation flags (bitwise OR) | number | cf_ | +---------+-----------------------------------+-------------------------+-------------+ @@ -212,10 +212,14 @@ PCRE: A locale *lo* may be specified. **Returns:** 1. The subject string with the substitutions made. 2. Number of matches found. + 3. Number of substitutions made. **Details:** - The parameter *repl* can be either a string, a function or a table. The - function behaves differently depending on the *repl* type: + The parameter *repl* can be either a string, a function or a table. + On each match made, it is converted into a value *repl_out* that may be used + for the replacement. + + *repl_out* is generated differently depending on the type of *repl*: 1. If *repl* is a *string* then it is treated as a template for substitution, where the %X occurences in *repl* are handled in a special way, depending @@ -235,37 +239,60 @@ PCRE: A locale *lo* may be specified. string * if X is any non-digit character then %X is substituted by X - * all parts of *repl* other than %X are copied to the output string - verbatim. - 2. If *repl* is a *function* then it gets called on each match with the + All parts of *repl* other than %X are copied to *repl_out* verbatim. + + 2. If *repl* is a *function* then it is called on each match with the submatches passed as parameters (if there are no submatches then the entire - match is passed as the only parameter). The substitution string is derived - depending on the first return value of function *repl*: + match is passed as the only parameter). *repl_out* is the return value of + the *repl* call, and is interpreted as follows: + + * if it is a string or a number (coerced to a string), then the replacement + value is that string; + * if it is a ``nil`` or a ``false``, then no replacement is to be done; + + 3. If *repl* is a table then *repl_out* is *repl* [m1], where m1 is the first + submatch (or the entire match if there are no submatches), following the + same rules as for the return value of *repl* call, described in the above + paragraph. + + Note: Under some circumstances, the value of *repl_out* may be ignored; see + below_. + + gsub behaves differently depending on the type of *n*: - * if it is a string then it is used as a substitution for the current match. - * if it is either of nothing, ``nil`` or ``false`` then no substitution is - made. - * values of other types generate an error. + 1. If *n* is a *number* then it is treated as the maximum number of matches + to search for (an omitted or ``nil`` value means an unlimited number of + matches). On each match, the replacement value is the *repl_out* string + (see above). - Though gsub_ is in general consistent with the API and behavior of Lua's - *string.gsub*, it has one extension with regards to *string.gsub* behavior: +.. _below: - * if function *repl* returns more than one value and its second return value - is the literal string *"break"*, then gsub_ stops searching for further - matches in the subject and returns. + 2. If *n* is a function, then it is called on each match, after *repl_out* is + produced (so if *repl* is a function, it will be called prior to the *n* + call). - 3. If *repl* is a *table* then the first submatch (or the entire match if - there are no submatches) is used as the key and the value stored in *repl* - under that key is used for substitution depending on its type. + *n* receives 3 arguments and returns 2 values. Its arguments are: - * If no value is stored under the key but *repl* has a metatable with the - *__index* field set then the correspondent metamethod will be called for - obtaining the value. + 1. The start offset of the match (a number) + 2. The end offset of the match (a number) + 3. *repl_out* - * The obtained value is used for the substitution following exactly same - rules as for the first return value of *repl* described in the above - paragraph. + The type of its first return controls the replacement produced by gsub for + the current match: + + * ``true`` -- replace/don't replace, according to *repl_out*; + * ``nil``/``false`` -- don't replace; + * a string (or a number coerced to a string) -- replace by that string; + + The type of its second return controls gsub behavior after the current + match is handled: + + * ``nil``/``false`` -- no changes: *n* will be called on the next match; + * ``true`` -- search for an unlimited number of matches; *n* will not be + called anymore; + * a number -- maximum number of matches to search for, beginning from the + next match; *n* will not be called anymore; ------------------------------------------------------------ @@ -407,7 +434,7 @@ documentation. config ------ -[PCRE only. See *pcre_config* in the PCRE_ docs.] +[PCRE 4.0 and later. See *pcre_config* in the PCRE_ docs.] :funcdef:`rex.config ([tb])` @@ -573,7 +600,7 @@ string *subj*, using a DFA matching algorithm. Incompatibilities with the Previous Versions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -**The following changes are incompatible with Lrexlib version 1.19:** +**Incompatibilities with the version 1.19:** 1. Lua 5.1 is required #. Functions ``newPCRE`` and ``newPOSIX`` renamed to new_ @@ -585,9 +612,13 @@ Incompatibilities with the Previous Versions #. Method exec_: the returned table may additionally contain *named subpatterns* (PCRE only) -**The following changes are incompatible with Lrexlib version 2.0:** +**Incompatibilities with the version 2.0:** 1. match_, find_, tfind_, exec_, dfa_exec_: only one value (a ``nil``) is returned when the subject does not match the pattern. Any other failure generates an error. +**Incompatibilities with the version 2.1:** + + 1. gsub_: a special "break" return of *repl* function is deprecated. + diff --git a/src/common.c b/src/common.c index 1b554ac..7b5736b 100755 --- a/src/common.c +++ b/src/common.c @@ -81,45 +81,15 @@ void CheckStack (lua_State *L, int extraslots) } int OptLimit (lua_State *L, int pos) { - if (!lua_isnoneornil (L, pos)) { - int a = luaL_checkint (L, pos); + if (lua_isnoneornil (L, pos)) + return GSUB_UNLIMITED; + if (lua_isfunction (L, pos)) + return GSUB_CONDITIONAL; + if (lua_isnumber (L, pos)) { + int a = lua_tointeger (L, pos); return a < 0 ? 0 : a; } - return -1; -} - -/* function plainfind (s, p, [st], [ci]) */ -int plainfind_func (lua_State *L) { - size_t textlen, patlen; - const char *text = luaL_checklstring (L, 1, &textlen); - const char *pattern = luaL_checklstring (L, 2, &patlen); - const char *from = text + get_startoffset (L, 3, textlen); - int ci = lua_toboolean (L, 4); - const char *end = text + textlen; - - for (; from + patlen <= end; ++from) { - const char *f = from, *p = pattern; - size_t len = patlen + 1; - if (ci) { - while (--len) { - if (toupper (*f++) != toupper (*p++)) - break; - } - } - else { - while (--len) { - if (*f++ != *p++) - break; - } - } - if (len == 0) { - lua_pushinteger (L, from - text + 1); - lua_pushinteger (L, from - text + patlen); - return 2; - } - } - lua_pushnil (L); - return 1; + return luaL_argerror (L, pos, "number or function expected"); } /* Classes */ @@ -184,10 +154,18 @@ void buffer_free (TBuffer *buf) { free (buf->arr); } +void buffer_clear (TBuffer *buf) { + buf->top = 0; +} + void buffer_pushresult (TBuffer *buf) { lua_pushlstring (buf->L, buf->arr, buf->top); } +void buffer_addbuffer (TBuffer *trg, TBuffer *src) { + buffer_addlstring (trg, src->arr, src->top); +} + void buffer_addlstring (TBuffer *buf, const void *src, size_t sz) { size_t newtop = buf->top + sz; if (newtop > buf->size) { @@ -209,14 +187,14 @@ void buffer_addvalue (TBuffer *buf, int stackpos) { buffer_addlstring (buf, p, len); } -void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len) { +static void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len) { size_t header[2] = { ID_STRING }; header[1] = len; buffer_addlstring (buf, header, sizeof (header)); buffer_addlstring (buf, src, len); } -void bufferZ_addnum (TBuffer *buf, size_t num) { +static void bufferZ_addnum (TBuffer *buf, size_t num) { size_t header[2] = { ID_NUMBER }; header[1] = num; buffer_addlstring (buf, header, sizeof (header)); diff --git a/src/common.h b/src/common.h index 0ee23cd..f9abf2c 100755 --- a/src/common.h +++ b/src/common.h @@ -6,7 +6,7 @@ #include "lua.h" -#define REX_VERSION "Lrexlib 2.1.0" +#define REX_VERSION "Lrexlib 2.2.0 beta" /* REX_API can be overridden from the command line or Makefile */ #ifndef REX_API @@ -24,10 +24,29 @@ int get_startoffset (lua_State *L, int stackpos, size_t len); void *Lmalloc (lua_State *L, size_t size); void CheckStack (lua_State *L, int extraslots); int OptLimit (lua_State *L, int pos); -int plainfind_func (lua_State *L); /* Classes */ +typedef struct { /* compile arguments */ + const char * pattern; + size_t patlen; + int cflags; + const char * locale; +} TArgComp; + +typedef struct { /* exec arguments */ + const char * text; + size_t textlen; + int startoffset; + int eflags; + int funcpos; + int maxmatch; + int funcpos2; /* used with gsub */ + int reptype; /* used with gsub */ + size_t ovecsize; /* used with dfa_exec */ + size_t wscount; /* used with dfa_exec */ +} TArgExec; + struct tagFreeList; /* forward declaration */ struct tagBuffer { @@ -52,6 +71,8 @@ void freelist_free (TFreeList *fl); void buffer_init (TBuffer *buf, size_t sz, lua_State *L, TFreeList *fl); void buffer_free (TBuffer *buf); +void buffer_clear (TBuffer *buf); +void buffer_addbuffer (TBuffer *trg, TBuffer *src); void buffer_addlstring (TBuffer *buf, const void *src, size_t sz); void buffer_addvalue (TBuffer *buf, int stackpos); void buffer_pushresult (TBuffer *buf); @@ -59,4 +80,10 @@ void buffer_pushresult (TBuffer *buf); void bufferZ_putrepstring (TBuffer *buf, int reppos, int nsub); int bufferZ_next (TBuffer *buf, size_t *iter, size_t *len, const char **str); +/* These are the special values for maxmatch in gsub. + * They all must be negative. + */ +#define GSUB_UNLIMITED -1 +#define GSUB_CONDITIONAL -2 + #endif diff --git a/src/lpcre.c b/src/lpcre.c index 7e34b77..c313465 100755 --- a/src/lpcre.c +++ b/src/lpcre.c @@ -12,7 +12,7 @@ #include "common.h" extern int Lpcre_get_flags (lua_State *L); extern int Lpcre_config (lua_State *L); -extern int generate_error (lua_State *L, int err_code); +extern flag_pair pcre_error_flags[]; /* These 2 settings may be redefined from the command-line or the makefile. * They should be kept in sync between themselves and with the target name. @@ -24,8 +24,8 @@ extern int generate_error (lua_State *L, int err_code); # define REX_OPENLIB luaopen_rex_pcre #endif -const char pcre_typename[] = REX_LIBNAME"_regex"; -const char *pcre_handle = pcre_typename; +#define CFLAGS_DEFAULT 0 +#define EFLAGS_DEFAULT 0 #define CODE_NOMATCH PCRE_ERROR_NOMATCH #define IS_MATCH(res) ((res) >= 0) @@ -45,9 +45,15 @@ const char *pcre_handle = pcre_typename; #define PUSH_END(L,ud,offs,n) lua_pushinteger(L, (offs) + SUB_END(ud,n)) #define PUSH_OFFSETS(L,ud,offs,n) (PUSH_START(L,ud,offs,n), PUSH_END(L,ud,offs,n)) -/* Data Types - ****************************************************************************** - */ +#define BASE(st) 0 +#define PULL(st,from) (st = (from)) +#define OPTLOCALE(trg,L,pos) (trg = luaL_optstring (L, pos, NULL)) +#if PCRE_MAJOR >= 4 +# define DO_NAMED_SUBPATTERNS do_named_subpatterns +#else +# define DO_NAMED_SUBPATTERNS(L,ud,text) ((void)L) +#endif + typedef struct { pcre * pr; @@ -58,94 +64,36 @@ typedef struct { int freed; } TPcre; -typedef struct { /* pcre_compile arguments */ - const char * pattern; - size_t patlen; - int cflags; - const char * locale; -} TArgComp; - -typedef struct { /* pcre_exec arguments */ - TPcre * ud; - const char * text; - size_t textlen; - int startoffset; - int eflags; - int funcpos; - int maxmatch; - int reptype; /* used with gsub */ - size_t ovecsize; /* used with dfa_exec */ - size_t wscount; /* used with dfa_exec */ -} TArgExec; +#define TUserdata TPcre +#include "algo.h" + +const char pcre_typename[] = REX_LIBNAME"_regex"; +const char *pcre_handle = pcre_typename; /* Functions ****************************************************************************** */ -static TPcre* check_ud (lua_State *L, int stackpos) { - return (TPcre *)luaL_checkudata (L, stackpos, pcre_handle); -} - -static void checkarg_new (lua_State *L, TArgComp *argC) { - argC->pattern = luaL_checkstring (L, 1); - argC->cflags = luaL_optint (L, 2, 0); - argC->locale = luaL_optstring (L, 3, NULL); -} - -/* function gsub (s, patt, f, [n], [cf], [ef], [lo]) */ -static void checkarg_gsub (lua_State *L, TArgComp *argC, TArgExec *argE) { - argE->text = luaL_checklstring (L, 1, &argE->textlen); - argC->pattern = luaL_checkstring (L, 2); - lua_tostring (L, 3); /* converts number (if any) to string */ - argE->reptype = lua_type (L, 3); - if (argE->reptype != LUA_TSTRING && argE->reptype != LUA_TTABLE && - argE->reptype != LUA_TFUNCTION) { - luaL_argerror (L, 3, "must be string, table or function"); - } - argE->funcpos = 3; - argE->maxmatch = OptLimit (L, 4); - argC->cflags = luaL_optint (L, 5, 0); - argE->eflags = luaL_optint (L, 6, 0); - argC->locale = luaL_optstring (L, 7, NULL); -} - -/* method r:tfind (s, [st], [ef]) */ -/* method r:exec (s, [st], [ef]) */ -static void checkarg_tfind (lua_State *L, TArgExec *argE) { - argE->ud = check_ud (L, 1); - argE->text = luaL_checklstring (L, 2, &argE->textlen); - argE->startoffset = get_startoffset (L, 3, argE->textlen); - argE->eflags = luaL_optint (L, 4, 0); -} - -/* function find (s, patt, [st], [cf], [ef], [lo]) */ -/* function match (s, patt, [st], [cf], [ef], [lo]) */ -static void checkarg_find_f (lua_State *L, TArgComp *argC, TArgExec *argE) { - argE->text = luaL_checklstring (L, 1, &argE->textlen); - argC->pattern = luaL_checkstring (L, 2); - argE->startoffset = get_startoffset (L, 3, argE->textlen); - argC->cflags = luaL_optint (L, 4, 0); - argE->eflags = luaL_optint (L, 5, 0); - argC->locale = luaL_optstring (L, 6, NULL); +static int generate_error (lua_State *L, const TPcre *ud, int errcode) { + const char *key = get_flag_key (pcre_error_flags, errcode); + (void) ud; + if (key) + return luaL_error (L, "error PCRE_%s", key); + else + return luaL_error (L, "PCRE error code %d", errcode); } -/* function gmatch (s, patt, [cf], [ef], [lo]) */ -/* function split (s, patt, [cf], [ef], [lo]) */ -static void checkarg_gmatch_split (lua_State *L, TArgComp *argC, TArgExec *argE) { - argE->text = luaL_checklstring (L, 1, &argE->textlen); - argC->pattern = luaL_checkstring (L, 2); - argC->cflags = luaL_optint (L, 3, 0); - argE->eflags = luaL_optint (L, 4, 0); - argC->locale = luaL_optstring (L, 5, NULL); +static TPcre* check_ud (lua_State *L, int stackpos) { + return (TPcre *)luaL_checkudata (L, stackpos, pcre_handle); } #if PCRE_MAJOR >= 6 /* method r:dfa_exec (s, [st], [ef], [ovecsize], [wscount]) */ -static void checkarg_dfa_exec (lua_State *L, TArgExec *argE) { - argE->ud = check_ud (L, 1); +static void checkarg_dfa_exec (lua_State *L, TArgExec *argE, TPcre **ud) { + *ud = check_ud (L, 1); argE->text = luaL_checklstring (L, 2, &argE->textlen); argE->startoffset = get_startoffset (L, 3, argE->textlen); - argE->eflags = luaL_optint (L, 4, 0); + argE->eflags = luaL_optint (L, 4, EFLAGS_DEFAULT); argE->ovecsize = luaL_optint (L, 5, 100); argE->wscount = luaL_optint (L, 6, 50); } @@ -191,49 +139,7 @@ static int compile_regex (lua_State *L, const TArgComp *argC, TPcre **pud) { return 1; } -static int Lpcre_new (lua_State *L) { - TArgComp argC; - checkarg_new (L, &argC); - return compile_regex (L, &argC, NULL); -} - -static void push_substrings (lua_State *L, TPcre *ud, const char *text) { - int i; - CheckStack (L, NSUB(ud)); - for (i = 1; i <= NSUB(ud); i++) { - PUSH_SUB_OR_FALSE (L, ud, text, i); - } -} - -static void push_substring_table (lua_State *L, TPcre *ud, const char *text) { - int i; - lua_newtable (L); - for (i = 1; i <= NSUB(ud); i++) { - PUSH_SUB_OR_FALSE (L, ud, text, i); - lua_rawseti (L, -2, i); - } -} - -static void push_offset_table (lua_State *L, const int *offsets, int nmax) { - int i, j, k; - lua_newtable (L); - for (i=1, j=1; i <= nmax; i++) { - k = i * 2; - if (offsets[k] >= 0) { - lua_pushinteger (L, offsets[k] + 1); - lua_rawseti (L, -2, j++); - lua_pushinteger (L, offsets[k+1]); - lua_rawseti (L, -2, j++); - } - else { - lua_pushboolean (L, 0); - lua_rawseti (L, -2, j++); - lua_pushboolean (L, 0); - lua_rawseti (L, -2, j++); - } - } -} - +#if PCRE_MAJOR >= 4 /* the target table must be on lua stack top */ static void do_named_subpatterns (lua_State *L, TPcre *ud, const char *text) { int i, namecount, name_entry_size; @@ -256,45 +162,27 @@ static void do_named_subpatterns (lua_State *L, TPcre *ud, const char *text) { tabptr += name_entry_size; } } +#endif /* #if PCRE_MAJOR >= 4 */ -static int generic_tfind (lua_State *L, int tfind) { - TPcre *ud; - TArgExec argE; - int res; - - checkarg_tfind (L, &argE); - ud = argE.ud; - res = pcre_exec (ud->pr, ud->extra, argE.text, (int)argE.textlen, - argE.startoffset, argE.eflags, ud->match, - (ud->ncapt + 1) * 3); - if (IS_MATCH (res)) { - PUSH_OFFSETS (L, ud, 0, 0); - if (tfind) - push_substring_table (L, ud, argE.text); - else - push_offset_table (L, ud->match, ud->ncapt); - do_named_subpatterns (L, ud, argE.text); - return 3; - } - else if (res == CODE_NOMATCH) - return lua_pushnil (L), 1; - else - return generate_error(L, res); +static int tfind_exec (TPcre *ud, TArgExec *argE) { + return pcre_exec (ud->pr, ud->extra, argE->text, (int)argE->textlen, + argE->startoffset, argE->eflags, ud->match, (ud->ncapt + 1) * 3); } #if PCRE_MAJOR >= 6 static int Lpcre_dfa_exec (lua_State *L) { TArgExec argE; + TPcre *ud; int res; int *buf, *ovector, *wspace; - checkarg_dfa_exec (L, &argE); + checkarg_dfa_exec (L, &argE, &ud); buf = (int*) Lmalloc (L, (argE.ovecsize + argE.wscount) * sizeof(int)); ovector = buf; wspace = buf + argE.ovecsize; - res = pcre_dfa_exec (argE.ud->pr, argE.ud->extra, argE.text, (int)argE.textlen, + res = pcre_dfa_exec (ud->pr, ud->extra, argE.text, (int)argE.textlen, argE.startoffset, argE.eflags, ovector, argE.ovecsize, wspace, argE.wscount); if (IS_MATCH (res) || res == PCRE_ERROR_PARTIAL) { @@ -315,250 +203,33 @@ static int Lpcre_dfa_exec (lua_State *L) if (res == CODE_NOMATCH) return lua_pushnil (L), 1; else - return generate_error (L, res); + return generate_error (L, ud, res); } } #endif /* #if PCRE_MAJOR >= 6 */ -static int Lpcre_tfind (lua_State *L) { - return generic_tfind (L, 1); -} - -static int Lpcre_exec (lua_State *L) { - return generic_tfind (L, 0); -} - -static int gmatch_iter (lua_State *L) { - int res; - size_t textlen; - TPcre *ud = (TPcre*) lua_touserdata (L, lua_upvalueindex (1)); - const char *text = lua_tolstring (L, lua_upvalueindex (2), &textlen); - int eflags = lua_tointeger (L, lua_upvalueindex (3)); - int startoffset = lua_tointeger (L, lua_upvalueindex (4)); - - if (startoffset > (int)textlen) - return 0; - res = pcre_exec (ud->pr, ud->extra, text, textlen, startoffset, eflags, - ud->match, (NSUB(ud) + 1) * 3); - if (IS_MATCH (res)) { - int incr = (SUB_LEN(ud,0) == 0) ? 1 : 0; /* prevent endless loop */ - PUSH_END (L, ud, incr, 0); /* update start offset */ - lua_replace (L, lua_upvalueindex (4)); - /* push either captures or entire match */ - if (NSUB(ud)) { - push_substrings (L, ud, text); - return NSUB(ud); - } - else { - PUSH_SUB (L, ud, text, 0); - return 1; - } - } - else if (res == CODE_NOMATCH) - return 0; - else - return generate_error(L, res); +static int gmatch_exec (TUserdata *ud, TArgExec *argE) { + return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, + argE->startoffset, argE->eflags, ud->match, (NSUB(ud) + 1) * 3); } -static int split_iter (lua_State *L) { - int res; - size_t textlen; - TPcre *ud = (TPcre*) lua_touserdata (L, lua_upvalueindex (1)); - const char *text = lua_tolstring (L, lua_upvalueindex (2), &textlen); - int eflags = lua_tointeger (L, lua_upvalueindex (3)); - int startoffset = lua_tointeger (L, lua_upvalueindex (4)); - int newoffset; - - if (startoffset >= (int)textlen) - return 0; - for (newoffset = startoffset; newoffset < (int)textlen; ++newoffset) { - res = pcre_exec (ud->pr, ud->extra, text, textlen, newoffset, eflags, - ud->match, (NSUB(ud) + 1) * 3); - if (IS_MATCH (res)) { - if (SUB_LEN(ud,0)) { - PUSH_END (L, ud, 0, 0); /* update start offset */ - lua_replace (L, lua_upvalueindex (4)); - /* push text preceding the match */ - lua_pushlstring (L, text + startoffset, SUB_BEG(ud,0) - startoffset); - /* push either captures or entire match */ - if (NSUB(ud)) { - push_substrings (L, ud, text); - return 1 + NSUB(ud); - } - else { - PUSH_SUB (L, ud, text, 0); - return 2; - } - } - } - else if (res == CODE_NOMATCH) - break; - else - return generate_error (L, res); - } - lua_pushinteger (L, textlen); /* mark as last iteration */ - lua_replace (L, lua_upvalueindex (4)); /* update start offset */ - lua_pushlstring (L, text + startoffset, textlen - startoffset); - return 1; +static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { + lua_pushlstring (L, argE->text, argE->textlen); } -static int generic_gmatch (lua_State *L, lua_CFunction iter) -{ - TArgComp argC; - TArgExec argE; - checkarg_gmatch_split (L, &argC, &argE); - compile_regex (L, &argC, &argE.ud); /* 1-st upvalue: ud */ - lua_pushlstring (L, argE.text, argE.textlen); /* 2-nd upvalue: s */ - lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */ - lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */ - lua_pushcclosure (L, iter, 4); - return 1; +static int findmatch_exec (TPcre *ud, TArgExec *argE) { + return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, + argE->startoffset, argE->eflags, ud->match, (NSUB(ud) + 1) * 3); } -static int Lpcre_gmatch (lua_State *L) { - return generic_gmatch (L, gmatch_iter); +static int gsub_exec (TPcre *ud, TArgExec *argE, int st) { + return pcre_exec (ud->pr, ud->extra, argE->text, (int)argE->textlen, + st, argE->eflags, ud->match, (NSUB(ud) + 1) * 3); } -static int Lpcre_split (lua_State *L) { - return generic_gmatch (L, split_iter); -} - -static int generic_find (lua_State *L, int find) { - int res; - TPcre *ud; - TArgComp argC; - TArgExec argE; - - checkarg_find_f (L, &argC, &argE); - compile_regex (L, &argC, &ud); - res = pcre_exec (ud->pr, ud->extra, argE.text, argE.textlen, argE.startoffset, - argE.eflags, ud->match, (NSUB(ud) + 1) * 3); - if (IS_MATCH (res)) { - if (find) - PUSH_OFFSETS (L, ud, 0, 0); - if (NSUB(ud)) /* push captures */ - push_substrings (L, ud, argE.text); - else if (!find) { - PUSH_SUB (L, ud, argE.text, 0); - return 1; - } - return find ? NSUB(ud) + 2 : NSUB(ud); - } - else if (res == CODE_NOMATCH) - return lua_pushnil (L), 1; - else - return generate_error (L, res); -} - -static int Lpcre_find (lua_State *L) { - return generic_find (L, 1); -} - -static int Lpcre_match (lua_State *L) { - return generic_find (L, 0); -} - -static int Lpcre_gsub (lua_State *L) { - TPcre *ud; - TArgComp argC; - TArgExec argE; - int reps = 0, st = 0; - TBuffer BufOut, BufRep; - TFreeList freelist; - /*--------------------------------------------------------------------------*/ - checkarg_gsub (L, &argC, &argE); - compile_regex (L, &argC, &ud); - freelist_init (&freelist); - /*--------------------------------------------------------------------------*/ - if (argE.reptype == LUA_TSTRING) { - buffer_init (&BufRep, 256, L, &freelist); - bufferZ_putrepstring (&BufRep, argE.funcpos, NSUB(ud)); - } - else if (argE.reptype == LUA_TFUNCTION) - lua_pushliteral (L, "break"); - /*--------------------------------------------------------------------------*/ - buffer_init (&BufOut, 1024, L, &freelist); - while ((argE.maxmatch < 0 || reps < argE.maxmatch) && st <= (int)argE.textlen) { - int from, to, res; - res = pcre_exec (ud->pr, ud->extra, argE.text, (int)argE.textlen, st, - argE.eflags, ud->match, (NSUB(ud) + 1) * 3); - if (res == CODE_NOMATCH) - break; - else if (!IS_MATCH (res)) { - freelist_free (&freelist); - return generate_error (L, res); - } - ++reps; - from = SUB_BEG(ud,0); - to = SUB_END(ud,0); - if (from > st) - buffer_addlstring (&BufOut, argE.text + st, from - st); - /*------------------------------------------------------------------------*/ - if (argE.reptype == LUA_TSTRING) { - size_t iter = 0, num; - const char *str; - while (bufferZ_next (&BufRep, &iter, &num, &str)) { - if (str) - buffer_addlstring (&BufOut, str, num); - else if (num == 0 || SUB_VALID (ud,num)) - buffer_addlstring (&BufOut, argE.text + SUB_BEG(ud,num), SUB_LEN(ud,num)); - } - } - /*------------------------------------------------------------------------*/ - else if (argE.reptype == LUA_TTABLE) { - if (NSUB(ud) > 0) - PUSH_SUB_OR_FALSE (L, ud, argE.text, 1); - else - lua_pushlstring (L, argE.text + from, to - from); - lua_gettable (L, argE.funcpos); - } - /*------------------------------------------------------------------------*/ - else if (argE.reptype == LUA_TFUNCTION) { - int narg; - lua_pushvalue (L, argE.funcpos); - if (NSUB(ud) > 0) { - push_substrings (L, ud, argE.text); - narg = NSUB(ud); - } - else { - lua_pushlstring (L, argE.text + from, to - from); - narg = 1; - } - if (0 != lua_pcall (L, narg, 2, 0)) { - freelist_free (&freelist); - return lua_error (L); /* re-raise the error */ - } - } - /*------------------------------------------------------------------------*/ - if (argE.reptype != LUA_TSTRING) { - int pos = (argE.reptype == LUA_TFUNCTION) ? -2 : -1; - if (lua_tostring (L, pos)) - buffer_addvalue (&BufOut, pos); - else if (!lua_toboolean (L, pos)) - buffer_addlstring (&BufOut, argE.text + from, to - from); - else { - freelist_free (&freelist); - luaL_error (L, "invalid replacement value (a %s)", luaL_typename (L, pos)); - } - if (argE.reptype == LUA_TFUNCTION && lua_equal (L, -1, -3)) - argE.maxmatch = 0; /* signal break from the loop */ - lua_pop (L, -pos); - } - /*------------------------------------------------------------------------*/ - if (from < to) - st = to; - else if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */ - buffer_addlstring (&BufOut, argE.text + st, 1); - ++st; - } - else break; - } - /*--------------------------------------------------------------------------*/ - buffer_addlstring (&BufOut, argE.text + st, argE.textlen - st); - buffer_pushresult (&BufOut); - lua_pushinteger (L, reps); - freelist_free (&freelist); - return 2; +static int split_exec (TPcre *ud, TArgExec *argE, int offset) { + return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, offset, + argE->eflags, ud->match, (NSUB(ud) + 1) * 3); } static int Lpcre_gc (lua_State *L) { @@ -588,8 +259,8 @@ static int Lpcre_version (lua_State *L) { } static const luaL_reg pcremeta[] = { - { "exec", Lpcre_exec }, - { "tfind", Lpcre_tfind }, /* old match */ + { "exec", ud_exec }, + { "tfind", ud_tfind }, /* old match */ #if PCRE_MAJOR >= 6 { "dfa_exec", Lpcre_dfa_exec }, #endif @@ -599,16 +270,18 @@ static const luaL_reg pcremeta[] = { }; static const luaL_reg rexlib[] = { - { "match", Lpcre_match }, - { "find", Lpcre_find }, - { "gmatch", Lpcre_gmatch }, - { "gsub", Lpcre_gsub }, - { "split", Lpcre_split }, - { "new", Lpcre_new }, + { "match", match }, + { "find", find }, + { "gmatch", gmatch }, + { "gsub", gsub }, + { "split", split }, + { "new", ud_new }, { "plainfind", plainfind_func }, { "flags", Lpcre_get_flags }, - { "config", Lpcre_config }, { "version", Lpcre_version }, +#if PCRE_MAJOR >= 4 + { "config", Lpcre_config }, +#endif { NULL, NULL } }; diff --git a/src/lpcre_f.c b/src/lpcre_f.c index b19aece..e474c74 100755 --- a/src/lpcre_f.c +++ b/src/lpcre_f.c @@ -82,7 +82,7 @@ static flag_pair pcre_flags[] = { { NULL, 0 } }; -static flag_pair pcre_error_flags[] = { +flag_pair pcre_error_flags[] = { { "ERROR_NOMATCH", PCRE_ERROR_NOMATCH }, { "ERROR_NULL", PCRE_ERROR_NULL }, { "ERROR_BADOPTION", PCRE_ERROR_BADOPTION }, @@ -147,6 +147,7 @@ int Lpcre_get_flags (lua_State *L) { return get_flags (L, fps); } +#if PCRE_MAJOR >= 4 int Lpcre_config (lua_State *L) { int val; flag_pair *fp; @@ -162,12 +163,5 @@ int Lpcre_config (lua_State *L) { } return 1; } - -int generate_error (lua_State *L, int errcode) { - const char *key = get_flag_key (pcre_error_flags, errcode); - if (key) - return luaL_error (L, "error PCRE_%s", key); - else - return luaL_error (L, "PCRE error code %d", errcode); -} +#endif diff --git a/src/lposix.c b/src/lposix.c index fba8052..b3fe8f9 100755 --- a/src/lposix.c +++ b/src/lposix.c @@ -42,16 +42,17 @@ # define EFLAGS_DEFAULT 0 #endif -const char posix_typename[] = REX_LIBNAME"_regex"; -const char *posix_handle = posix_typename; - #define CODE_NOMATCH REG_NOMATCH #define IS_MATCH(res) ((res) == 0) #define SUB_BEG(ud,n) ud->match[n].rm_so #define SUB_END(ud,n) ud->match[n].rm_eo #define SUB_LEN(ud,n) (SUB_END(ud,n) - SUB_BEG(ud,n)) #define SUB_VALID(ud,n) (SUB_BEG(ud,n) >= 0) -#define NSUB(ud) ((int)ud->r.re_nsub) +#ifdef REX_NSUB_BASE1 +# define NSUB(ud) ((int)ud->r.re_nsub - 1) +#else +# define NSUB(ud) ((int)ud->r.re_nsub) +#endif #define PUSH_SUB(L,ud,text,n) \ lua_pushlstring (L, (text) + SUB_BEG(ud,n), SUB_LEN(ud,n)) @@ -63,10 +64,10 @@ const char *posix_handle = posix_typename; #define PUSH_END(L,ud,offs,n) lua_pushinteger(L, (offs) + SUB_END(ud,n)) #define PUSH_OFFSETS(L,ud,offs,n) (PUSH_START(L,ud,offs,n), PUSH_END(L,ud,offs,n)) - -/* Data Types - ****************************************************************************** - */ +#define BASE(st) (st) +#define PULL(st,from) ((void)st) +#define OPTLOCALE(trg,L,pos) ((void)trg) +#define DO_NAMED_SUBPATTERNS(L,ud,text) ((void)L) typedef struct { regex_t r; @@ -74,22 +75,11 @@ typedef struct { int freed; } TPosix; -typedef struct { /* regcomp arguments */ - const char * pattern; - size_t patlen; - int cflags; -} TArgComp; - -typedef struct { /* regexec arguments */ - TPosix * ud; - const char * text; - size_t textlen; - int startoffset; - int eflags; - int funcpos; - int maxmatch; - int reptype; /* used with gsub */ -} TArgExec; +#define TUserdata TPosix +#include "algo.h" + +const char posix_typename[] = REX_LIBNAME"_regex"; +const char *posix_handle = posix_typename; /* Functions ****************************************************************************** @@ -99,56 +89,7 @@ static TPosix* check_ud (lua_State *L, int stackpos) { return (TPosix *)luaL_checkudata (L, stackpos, posix_handle); } -static void checkarg_new (lua_State *L, TArgComp *argC) { - argC->pattern = luaL_checklstring (L, 1, &argC->patlen); - argC->cflags = luaL_optint (L, 2, CFLAGS_DEFAULT); -} - -/* function gsub (s, patt, f, [n], [cf], [ef]) */ -static void checkarg_gsub (lua_State *L, TArgComp *argC, TArgExec *argE) { - argE->text = luaL_checklstring (L, 1, &argE->textlen); - argC->pattern = luaL_checklstring (L, 2, &argC->patlen); - lua_tostring (L, 3); /* converts number (if any) to string */ - argE->reptype = lua_type (L, 3); - if (argE->reptype != LUA_TSTRING && argE->reptype != LUA_TTABLE && - argE->reptype != LUA_TFUNCTION) { - luaL_argerror (L, 3, "must be string, table or function"); - } - argE->funcpos = 3; - argE->maxmatch = OptLimit (L, 4); - argC->cflags = luaL_optint (L, 5, CFLAGS_DEFAULT); - argE->eflags = luaL_optint (L, 6, EFLAGS_DEFAULT); -} - -/* method r:tfind (s, [st], [ef]) */ -/* method r:exec (s, [st], [ef]) */ -static void checkarg_tfind (lua_State *L, TArgExec *argE) { - argE->ud = check_ud (L, 1); - argE->text = luaL_checklstring (L, 2, &argE->textlen); - argE->startoffset = get_startoffset (L, 3, argE->textlen); - argE->eflags = luaL_optint (L, 4, EFLAGS_DEFAULT); -} - -/* function find (s, patt, [st], [cf], [ef]) */ -/* function match (s, patt, [st], [cf], [ef]) */ -static void checkarg_find_f (lua_State *L, TArgComp *argC, TArgExec *argE) { - argE->text = luaL_checklstring (L, 1, &argE->textlen); - argC->pattern = luaL_checklstring (L, 2, &argC->patlen); - argE->startoffset = get_startoffset (L, 3, argE->textlen); - argC->cflags = luaL_optint (L, 4, CFLAGS_DEFAULT); - argE->eflags = luaL_optint (L, 5, EFLAGS_DEFAULT); -} - -/* function gmatch (s, patt, [cf], [ef]) */ -/* function split (s, patt, [cf], [ef]) */ -static void checkarg_gmatch_split (lua_State *L, TArgComp *argC, TArgExec *argE) { - argE->text = luaL_checklstring (L, 1, &argE->textlen); - argC->pattern = luaL_checklstring (L, 2, &argC->patlen); - argC->cflags = luaL_optint (L, 3, CFLAGS_DEFAULT); - argE->eflags = luaL_optint (L, 4, EFLAGS_DEFAULT); -} - -int generate_error (lua_State *L, const TPosix *ud, int errcode) { +static int generate_error (lua_State *L, const TPosix *ud, int errcode) { char errbuf[80]; regerror (errcode, &ud->r, errbuf, sizeof (errbuf)); return luaL_error (L, "%s", errbuf); @@ -180,358 +121,85 @@ static int compile_regex (lua_State *L, const TArgComp *argC, TPosix **pud) { return 1; } -static void push_substrings (lua_State *L, TPosix *ud, const char *text) { - int i; - CheckStack (L, NSUB(ud)); - for (i = 1; i <= NSUB(ud); i++) { - PUSH_SUB_OR_FALSE (L, ud, text, i); - } -} - -static void push_substring_table (lua_State *L, TPosix *ud, const char *text) { - int i; - lua_newtable (L); - for (i = 1; i <= NSUB(ud); i++) { - PUSH_SUB_OR_FALSE (L, ud, text, i); - lua_rawseti (L, -2, i); - } -} - -static void push_offset_table (lua_State *L, TPosix *ud, int startoffset) { - int i, j; - - lua_newtable (L); - for (i=1, j=1; i <= NSUB(ud); i++) { - if (SUB_VALID (ud,i)) { - PUSH_START (L, ud, startoffset, i); - lua_rawseti (L, -2, j++); - PUSH_END (L, ud, startoffset, i); - lua_rawseti (L, -2, j++); - } - else { - lua_pushboolean (L, 0); - lua_rawseti (L, -2, j++); - lua_pushboolean (L, 0); - lua_rawseti (L, -2, j++); - } - } -} - -static void CheckStartEnd (TArgExec *argE) { #ifdef REX_POSIX_EXT +static void CheckStartEnd (TArgExec *argE, TPosix *ud) { if (argE->eflags & REG_STARTEND) { - SUB_BEG(argE->ud,0) = argE->startoffset; - SUB_END(argE->ud,0) = argE->textlen; + ud->match[0].rm_so = argE->startoffset; + ud->match[0].rm_eo = argE->textlen; argE->startoffset = 0; } else argE->text += argE->startoffset; +} +#endif + +static int tfind_exec (TPosix *ud, TArgExec *argE) { +#ifdef REX_POSIX_EXT + CheckStartEnd (argE, ud); #else argE->text += argE->startoffset; #endif + return regexec (&ud->r, argE->text, NSUB(ud) + 1, ud->match, argE->eflags); } -static int generic_tfind (lua_State *L, int tfind) { - int res; - TArgExec argE; - TPosix *ud; - - checkarg_tfind (L, &argE); - CheckStartEnd (&argE); - ud = argE.ud; /* avoid too many redirections */ - - /* execute the search */ - res = regexec (&ud->r, argE.text, NSUB(ud) + 1, ud->match, argE.eflags); - if (IS_MATCH (res)) { - PUSH_OFFSETS (L, ud, argE.startoffset, 0); - if (tfind) - push_substring_table (L, ud, argE.text); - else - push_offset_table (L, ud, argE.startoffset); - return 3; - } - else if (res == CODE_NOMATCH) - return lua_pushnil (L), 1; - else - return generate_error (L, ud, res); -} - -static int Posix_tfind (lua_State *L) { - return generic_tfind (L, 1); -} - -static int Posix_exec (lua_State *L) { - return generic_tfind (L, 0); -} - -static int gmatch_iter (lua_State *L) { - int res; - size_t textlen; - int incr; - TPosix *ud = (TPosix*) lua_touserdata (L, lua_upvalueindex (1)); - const char *text = lua_tolstring (L, lua_upvalueindex (2), &textlen); - int eflags = lua_tointeger (L, lua_upvalueindex (3)); - int startoffset = lua_tointeger (L, lua_upvalueindex (4)); - - if (startoffset > (int)textlen) - return 0; +static int gmatch_exec (TUserdata *ud, TArgExec *argE) { + if (argE->startoffset > 0) + argE->eflags |= REG_NOTBOL; #ifdef REX_POSIX_EXT - if (eflags & REG_STARTEND) { + if (argE->eflags & REG_STARTEND) { SUB_BEG(ud,0) = 0; - SUB_END(ud,0) = textlen - startoffset; + SUB_END(ud,0) = argE->textlen - argE->startoffset; } #endif - /* execute the search */ - text += startoffset; - res = regexec (&ud->r, text, NSUB(ud) + 1, ud->match, eflags); - if (IS_MATCH (res)) { - /* push either captures or entire match */ - if (NSUB(ud)) - push_substrings (L, ud, text); - else - PUSH_SUB (L, ud, text, 0); - incr = (SUB_LEN(ud,0) == 0) ? 1 : 0; /* prevent endless loop */ - PUSH_END (L, ud, startoffset + incr, 0); /* update start offset */ - lua_replace (L, lua_upvalueindex (4)); - return NSUB(ud) ? NSUB(ud) : 1; - } - else if (res == CODE_NOMATCH) - return 0; - else - return generate_error (L, ud, res); + argE->text += argE->startoffset; + return regexec (&ud->r, argE->text, NSUB(ud) + 1, ud->match, argE->eflags); } -static int split_iter (lua_State *L) { - int res; - size_t textlen; - int newoffset; - TPosix *ud = (TPosix*) lua_touserdata (L, lua_upvalueindex (1)); - const char *text = lua_tolstring (L, lua_upvalueindex (2), &textlen); - int eflags = lua_tointeger (L, lua_upvalueindex (3)); - int startoffset = lua_tointeger (L, lua_upvalueindex (4)); - - if (startoffset >= (int)textlen) - return 0; - for (newoffset = startoffset; newoffset < (int)textlen; ++newoffset) { +static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { #ifdef REX_POSIX_EXT - if (eflags & REG_STARTEND) { - SUB_BEG(ud,0) = 0; - SUB_END(ud,0) = textlen - newoffset; - } + if (argE->eflags & REG_STARTEND) + lua_pushlstring (L, argE->text, argE->textlen); + else + lua_pushlstring (L, argE->text, strlen (argE->text)); +#else + lua_pushlstring (L, argE->text, strlen (argE->text)); #endif - - /* execute the search */ - res = regexec (&ud->r, text + newoffset, NSUB(ud) + 1, ud->match, eflags); - if (IS_MATCH (res)) { - if (SUB_LEN(ud,0)) { - PUSH_END (L, ud, newoffset, 0); /* update start offset */ - lua_replace (L, lua_upvalueindex (4)); - /* push text preceding the match */ - lua_pushlstring (L, text + startoffset, SUB_BEG(ud,0) + newoffset - startoffset); - /* push either captures or entire match */ - if (NSUB(ud)) - push_substrings (L, ud, text + newoffset); - else - PUSH_SUB (L, ud, text + newoffset, 0); - return NSUB(ud) ? NSUB(ud)+1 : 2; - } - } - else if (res == CODE_NOMATCH) - break; - else - return generate_error (L, ud, res); - } - lua_pushinteger (L, textlen); /* mark as last iteration */ - lua_replace (L, lua_upvalueindex (4)); /* update start offset */ - lua_pushlstring (L, text + startoffset, textlen - startoffset); - return 1; } -static int generic_gmatch (lua_State *L, lua_CFunction iter) -{ - TArgComp argC; - TArgExec argE; - checkarg_gmatch_split (L, &argC, &argE); - compile_regex (L, &argC, &argE.ud); /* 1-st upvalue: ud */ +static int findmatch_exec (TPosix *ud, TArgExec *argE) { #ifdef REX_POSIX_EXT - if (argE.eflags & REG_STARTEND) - lua_pushlstring (L, argE.text, argE.textlen); /* 2-nd upvalue: s */ - else - lua_pushlstring (L, argE.text, strlen (argE.text)); + CheckStartEnd (argE, ud); #else - lua_pushlstring (L, argE.text, strlen (argE.text)); + argE->text += argE->startoffset; #endif - lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */ - lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */ - lua_pushcclosure (L, iter, 4); - return 1; -} - -static int Posix_gmatch (lua_State *L) { - return generic_gmatch (L, gmatch_iter); + return regexec (&ud->r, argE->text, NSUB(ud) + 1, ud->match, argE->eflags); } -static int Posix_split (lua_State *L) { - return generic_gmatch (L, split_iter); -} - -static int Posix_new (lua_State *L) { - TArgComp argC; - checkarg_new (L, &argC); - return compile_regex (L, &argC, NULL); -} - -static int generic_find (lua_State *L, int find) { - int res; - TPosix *ud; - TArgComp argC; - TArgExec argE; - - checkarg_find_f (L, &argC, &argE); - compile_regex (L, &argC, &argE.ud); - CheckStartEnd (&argE); - ud = argE.ud; - - res = regexec (&ud->r, argE.text, NSUB(ud) + 1, ud->match, argE.eflags); - if (IS_MATCH (res)) { - if (find) - PUSH_OFFSETS (L, ud, argE.startoffset, 0); - if (NSUB(ud)) /* push captures */ - push_substrings (L, ud, argE.text); - else if (!find) { /* push entire match */ - PUSH_SUB (L, ud, argE.text, 0); - return 1; - } - return find ? NSUB(ud) + 2 : NSUB(ud); +static int gsub_exec (TPosix *ud, TArgExec *argE, int st) { +#ifdef REX_POSIX_EXT + if(argE->eflags & REG_STARTEND) { + SUB_BEG(ud,0) = 0; + SUB_END(ud,0) = argE->textlen - st; } - else if (res == CODE_NOMATCH) - return lua_pushnil (L), 1; - else - return generate_error (L, ud, res); -} - -static int Posix_find (lua_State *L) { - return generic_find (L, 1); -} - -static int Posix_match (lua_State *L) { - return generic_find (L, 0); +#endif + if (st > 0) + argE->eflags |= REG_NOTBOL; + return regexec (&ud->r, argE->text+st, NSUB(ud)+1, ud->match, argE->eflags); } -static int Posix_gsub (lua_State *L) { - TPosix *ud; - TArgComp argC; - TArgExec argE; - int reps = 0, st = 0; - TBuffer BufOut, BufRep; - TFreeList freelist; - /*--------------------------------------------------------------------------*/ - checkarg_gsub (L, &argC, &argE); - compile_regex (L, &argC, &ud); - freelist_init (&freelist); - /*--------------------------------------------------------------------------*/ - if (argE.reptype == LUA_TSTRING) { - buffer_init (&BufRep, 256, L, &freelist); - bufferZ_putrepstring (&BufRep, argE.funcpos, NSUB(ud)); - } - else if (argE.reptype == LUA_TFUNCTION) - lua_pushliteral (L, "break"); - /*--------------------------------------------------------------------------*/ - buffer_init (&BufOut, 1024, L, &freelist); - while ((argE.maxmatch < 0 || reps < argE.maxmatch) && st <= (int)argE.textlen) { - int from, to, res; +static int split_exec (TPosix *ud, TArgExec *argE, int offset) { #ifdef REX_POSIX_EXT - if(argE.eflags & REG_STARTEND) { - SUB_BEG(ud,0) = 0; - SUB_END(ud,0) = argE.textlen - st; - } -#endif - res = regexec (&ud->r, argE.text+st, NSUB(ud)+1, ud->match, argE.eflags); - if (res == CODE_NOMATCH) - break; - else if (!IS_MATCH (res)) { - freelist_free (&freelist); - return generate_error (L, ud, res); - } - ++reps; - from = st + SUB_BEG(ud,0); - to = st + SUB_END(ud,0); - if (from > st) - buffer_addlstring (&BufOut, argE.text + st, from - st); - /*------------------------------------------------------------------------*/ - if (argE.reptype == LUA_TSTRING) { - size_t iter = 0, num; - const char *str; - while (bufferZ_next (&BufRep, &iter, &num, &str)) { - if (str == NULL) { /* got number in variable 'num' */ - if (num == 0) /* %0 : add the entire match */ - buffer_addlstring (&BufOut, argE.text + from, to - from); - else { /* add captured substring */ - if (SUB_VALID (ud,num)) - buffer_addlstring (&BufOut, argE.text + st + SUB_BEG(ud,num), - SUB_LEN(ud,num)); - } - } - else buffer_addlstring (&BufOut, str, num); - } - } - /*------------------------------------------------------------------------*/ - else if (argE.reptype == LUA_TTABLE) { - if (NSUB(ud) > 0) - PUSH_SUB_OR_FALSE (L, ud, argE.text + st, 1); - else - lua_pushlstring (L, argE.text + from, to - from); - lua_gettable (L, argE.funcpos); - } - /*------------------------------------------------------------------------*/ - else if (argE.reptype == LUA_TFUNCTION) { - int narg; - lua_pushvalue (L, argE.funcpos); - if (NSUB(ud) > 0) { - push_substrings (L, ud, argE.text + st); - narg = NSUB(ud); - } - else { - lua_pushlstring (L, argE.text + from, to - from); - narg = 1; - } - if (0 != lua_pcall (L, narg, 2, 0)) { - freelist_free (&freelist); - return lua_error (L); /* re-raise the error */ - } - } - /*------------------------------------------------------------------------*/ - if (argE.reptype != LUA_TSTRING) { - int pos = (argE.reptype == LUA_TFUNCTION) ? -2 : -1; - if (lua_tostring (L, pos)) - buffer_addvalue (&BufOut, pos); - else if (!lua_toboolean (L, pos)) - buffer_addlstring (&BufOut, argE.text + from, to - from); - else { - freelist_free (&freelist); - luaL_error (L, "invalid replacement value (a %s)", luaL_typename (L, pos)); - } - if (argE.reptype == LUA_TFUNCTION && lua_equal (L, -1, -3)) - argE.maxmatch = 0; /* signal break from the loop */ - lua_pop (L, -pos); - } - /*------------------------------------------------------------------------*/ - if (from < to) - st = to; - else if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */ - buffer_addlstring (&BufOut, argE.text + st, 1); - ++st; - } - else break; + if (argE->eflags & REG_STARTEND) { + SUB_BEG(ud,0) = 0; + SUB_END(ud,0) = argE->textlen - offset; } - /*--------------------------------------------------------------------------*/ - buffer_addlstring (&BufOut, argE.text + st, argE.textlen - st); - buffer_pushresult (&BufOut); - lua_pushinteger (L, reps); - freelist_free (&freelist); - return 2; +#endif + if (offset > 0) + argE->eflags |= REG_NOTBOL; + + return regexec (&ud->r, argE->text + offset, NSUB(ud) + 1, ud->match, argE->eflags); } static int Posix_gc (lua_State *L) { @@ -601,20 +269,20 @@ static int Posix_get_flags (lua_State *L) { } static const luaL_reg posixmeta[] = { - { "exec", Posix_exec }, - { "tfind", Posix_tfind }, /* old match */ + { "exec", ud_exec }, + { "tfind", ud_tfind }, /* old match */ { "__gc", Posix_gc }, { "__tostring", Posix_tostring }, { NULL, NULL} }; static const luaL_reg rexlib[] = { - { "match", Posix_match }, - { "find", Posix_find }, - { "gmatch", Posix_gmatch }, - { "gsub", Posix_gsub }, - { "split", Posix_split }, - { "new", Posix_new }, + { "match", match }, + { "find", find }, + { "gmatch", gmatch }, + { "gsub", gsub }, + { "split", split }, + { "new", ud_new }, { "flags", Posix_get_flags }, { "plainfind", plainfind_func }, { NULL, NULL } diff --git a/src/rex_pcre.mak b/src/rex_pcre.mak index 392eb09..83ae19a 100755 --- a/src/rex_pcre.mak +++ b/src/rex_pcre.mak @@ -20,7 +20,7 @@ TRG = rex_pcre # =========================================================================== # === END OF USER SETTINGS === -V = 2.1 +V = 2.2 DEFS = -DREX_OPENLIB=luaopen_$(TRG) -DREX_LIBNAME=\"$(TRG)\" CFLAGS = $(MYCFLAGS) $(DEFS) diff --git a/src/rex_posix.mak b/src/rex_posix.mak index 12fc3e9..a3179bf 100755 --- a/src/rex_posix.mak +++ b/src/rex_posix.mak @@ -32,7 +32,7 @@ TRG = rex_posix # =========================================================================== # === END OF USER SETTINGS === -V = 2.1 +V = 2.2 DEFS = -DREX_OPENLIB=luaopen_$(TRG) -DREX_LIBNAME=\"$(TRG)\" CFLAGS = $(MYCFLAGS) $(DEFS) diff --git a/src/scite.properties b/src/scite.properties index cf58e1a..35287aa 100755 --- a/src/scite.properties +++ b/src/scite.properties @@ -1,12 +1,21 @@ eol.mode=LF cc=bcc32 -dir_lua=\Progr\lib\lua\lua_5.1 -dir_pcre=\Progr\lib\pcre\pcre_7.0 -dir_posix=\Progr\lib\henry_spencer +dir_lib=\Progr\lib +dir_lua=$(dir_lib)\lua\lua_5.1 +dir_pcre=$(dir_lib)\pcre\pcre_7.0 +dir_posix=$(dir_lib)\henry_spencer +dir_tre=$(dir_lib) + +command.compile.lposix.c=cl.bat -A -I$(dir_lua) -I$(dir_posix) -c "$(FileNameExt)" + +command.compile.ltre.c=$(cc) -A -I$(dir_lua);$(dir_tre) -c "$(FileNameExt)" command.compile.*.c=$(cc) -A -I$(dir_lua);$(dir_pcre);$(dir_posix) -c \ "$(FileNameExt)" +command.compile.*.cpp=$(cc) -A -I$(dir_lua);$(dir_pcre);$(dir_posix) -c \ + "$(FileNameExt)" + command.build.*.c=lrexlib.bat command.build.*.h=lrexlib.bat diff --git a/test/common_sets.lua b/test/common_sets.lua index 9b55db2..8104f60 100755 --- a/test/common_sets.lua +++ b/test/common_sets.lua @@ -32,6 +32,8 @@ local function set_f_gmatch (lib, flg) --{ subj patt results } { {("abcd"):rep(3), "(.)b.(d)"}, {{"a","d"},{"a","d"},{"a","d"}} }, { {"abcd", ".*" }, {{"abcd",N},{"",N} } },--zero-length match + { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj + { {"abc", "^." }, {{"a",N}} },--anchored pattern } end @@ -58,6 +60,8 @@ local function set_f_split (lib, flg) { {"a,b,", ","}, {{"a",",",N}, {"b",",",N}, } }, { {"a,,b", ","}, {{"a",",",N}, {"",",",N}, {"b",N,N}} }, { {"ab<78>c", "<(.)(.)>"}, {{"ab","7","8"}, {"c",N,N}, } }, + { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj + { {"abc", "^."}, {{"", "a",N}, {"bc",N,N}, } },--anchored pattern } end @@ -141,18 +145,20 @@ local function set_f_gsub1 (lib, flg) return { Name = "Function gsub, set1", Func = get_gsub (lib), - --{ s, p, f, n, res1, res2 }, - { {subj, pat, "", 0}, {subj, 0} }, -- test "n" + empty_replace - { {subj, pat, "", -1}, {subj, 0} }, -- test "n" + empty_replace - { {subj, pat, "", 1}, {"cdef", 1} }, - { {subj, pat, "", 2}, {"cd", 2} }, - { {subj, pat, "", 3}, {"cd", 2} }, - { {subj, pat, "" }, {"cd", 2} }, - { {subj, pat, "#", 0}, {subj, 0} }, -- test "n" + non-empty_replace - { {subj, pat, "#", 1}, {"#cdef", 1} }, - { {subj, pat, "#", 2}, {"#cd#", 2} }, - { {subj, pat, "#", 3}, {"#cd#", 2} }, - { {subj, pat, "#" }, {"#cd#", 2} }, + --{ s, p, f, n, res1, res2, res3 }, + { {subj, pat, "", 0}, {subj, 0, 0} }, -- test "n" + empty_replace + { {subj, pat, "", -1}, {subj, 0, 0} }, -- test "n" + empty_replace + { {subj, pat, "", 1}, {"cdef", 1, 1} }, + { {subj, pat, "", 2}, {"cd", 2, 2} }, + { {subj, pat, "", 3}, {"cd", 2, 2} }, + { {subj, pat, "" }, {"cd", 2, 2} }, + { {subj, pat, "#", 0}, {subj, 0, 0} }, -- test "n" + non-empty_replace + { {subj, pat, "#", 1}, {"#cdef", 1, 1} }, + { {subj, pat, "#", 2}, {"#cd#", 2, 2} }, + { {subj, pat, "#", 3}, {"#cd#", 2, 2} }, + { {subj, pat, "#" }, {"#cd#", 2, 2} }, + { {"a\0c", ".", "#" }, {"###", 3, 3} }, -- subj contains nuls + { {"abc", "^.", "#" }, {"#bc", 1, 1} }, -- anchored pattern } end @@ -161,16 +167,16 @@ local function set_f_gsub2 (lib, flg) return { Name = "Function gsub, set2", Func = get_gsub (lib), - --{ s, p, f, n, res1, res2 }, - { {subj, pat, "<%1>" }, {"<a>b<c>", 2} }, -- test non-escaped chars in f - { {subj, pat, "%<%1%>" }, {"<a>b<c>", 2} }, -- test escaped chars in f - { {subj, pat, "" }, {"b", 2} }, -- test empty replace - { {subj, pat, "1" }, {"1b1", 2} }, -- test odd and even %'s in f - { {subj, pat, "%1" }, {"abc", 2} }, - { {subj, pat, "%%1" }, {"%1b%1", 2} }, - { {subj, pat, "%%%1" }, {"%ab%c", 2} }, - { {subj, pat, "%%%%1" }, {"%%1b%%1", 2} }, - { {subj, pat, "%%%%%1" }, {"%%ab%%c", 2} }, + --{ s, p, f, n, res1, res2, res3 }, + { {subj, pat, "<%1>" }, {"<a>b<c>", 2, 2} }, -- test non-escaped chars in f + { {subj, pat, "%<%1%>" }, {"<a>b<c>", 2, 2} }, -- test escaped chars in f + { {subj, pat, "" }, {"b", 2, 2} }, -- test empty replace + { {subj, pat, "1" }, {"1b1", 2, 2} }, -- test odd and even %'s in f + { {subj, pat, "%1" }, {"abc", 2, 2} }, + { {subj, pat, "%%1" }, {"%1b%1", 2, 2} }, + { {subj, pat, "%%%1" }, {"%ab%c", 2, 2} }, + { {subj, pat, "%%%%1" }, {"%%1b%%1", 2, 2} }, + { {subj, pat, "%%%%%1" }, {"%%ab%%c", 2, 2} }, } end @@ -178,11 +184,11 @@ local function set_f_gsub3 (lib, flg) return { Name = "Function gsub, set3", Func = get_gsub (lib), - --{ s, p, f, n, res1, res2 }, - { {"abc", "a", "%0" }, {"abc", 1} }, -- test (in)valid capture index - { {"abc", "a", "%1" }, {"abc", 1} }, - { {"abc", "[ac]", "%1" }, {"abc", 2} }, - { {"abc", "(a)", "%1" }, {"abc", 1} }, + --{ s, p, f, n, res1,res2,res3 }, + { {"abc", "a", "%0" }, {"abc", 1, 1} }, -- test (in)valid capture index + { {"abc", "a", "%1" }, {"abc", 1, 1} }, + { {"abc", "[ac]", "%1" }, {"abc", 2, 2} }, + { {"abc", "(a)", "%1" }, {"abc", 1, 1} }, { {"abc", "(a)", "%2" }, "invalid capture index" }, } end @@ -191,15 +197,15 @@ local function set_f_gsub4 (lib, flg) return { Name = "Function gsub, set4", Func = get_gsub (lib), - --{ s, p, f, n, res1, res2 }, - { {"a2c3", ".", "#" }, {"####", 4} }, -- test . - { {"a2c3", ".+", "#" }, {"#", 1} }, -- test .+ - { {"a2c3", ".*", "#" }, {"##", 2} }, -- test .* - { {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1} }, - { {"a2c3", "[0-9]", "#" }, {"a#c#", 2} }, -- test %d - { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2} }, -- test %D - { {"a \t\nb", "[ \t\n]", "#" }, {"a###b", 3} }, -- test %s - { {"a \t\nb", "[^ \t\n]", "#" }, {"# \t\n#", 2} }, -- test %S + --{ s, p, f, n, res1, res2, res3 }, + { {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test . + { {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+ + { {"a2c3", ".*", "#" }, {"##", 2, 2} }, -- test .* + { {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1, 1} }, + { {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d + { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D + { {"a \t\nb", "[ \t\n]", "#" }, {"a###b", 3, 3} }, -- test %s + { {"a \t\nb", "[^ \t\n]", "#" }, {"# \t\n#", 2, 2} }, -- test %S } end @@ -214,17 +220,17 @@ local function set_f_gsub5 (lib, flg) return { Name = "Function gsub, set5", Func = get_gsub (lib), - --{ s, p, f, n, res1, res2 }, - { {subj, "a(.)c(.)", frep1 }, {subj, 1} }, - { {subj, "a(.)c(.)", frep2 }, {"#", 1} }, - { {subj, "a(.)c(.)", frep3 }, {"2,3", 1} }, - { {subj, "a.c.", frep3 }, {subj, 1} }, - { {subj, "z*", frep1 }, {subj, 5} }, - { {subj, "z*", frep2 }, {"#a#2#c#3#", 5} }, - { {subj, "z*", frep3 }, {subj, 5} }, + --{ s, p, f, n, res1, res2, res3 }, + { {subj, "a(.)c(.)", frep1 }, {subj, 1, 0} }, + { {subj, "a(.)c(.)", frep2 }, {"#", 1, 1} }, + { {subj, "a(.)c(.)", frep3 }, {"2,3", 1, 1} }, + { {subj, "a.c.", frep3 }, {subj, 1, 1} }, + { {subj, "z*", frep1 }, {subj, 5, 0} }, + { {subj, "z*", frep2 }, {"#a#2#c#3#", 5, 5} }, + { {subj, "z*", frep3 }, {subj, 5, 5} }, { {subj, subj, frep4 }, "invalid return type" }, - { {"abc",".", frep5 }, {"777", 3} }, - { {"abc",".", frep6 }, {"7bc", 1} }, + { {"abc",".", frep5 }, {"777", 3, 3} }, + { {"abc",".", frep6 }, {"777", 3, 3} }, } end @@ -234,13 +240,44 @@ local function set_f_gsub6 (lib, flg) return { Name = "Function gsub, set6", Func = get_gsub (lib), - --{ s, p, f, n, res1, res2 }, - { {subj, "a(.)c(.)", tab1 }, {subj, 1} }, - { {subj, "a(.)c(.)", tab2 }, {"56", 1} }, + --{ s, p, f, n, res1,res2,res3 }, + { {subj, "a(.)c(.)", tab1 }, {subj, 1, 0} }, + { {subj, "a(.)c(.)", tab2 }, {"56", 1, 1} }, { {subj, "a(.)c(.)", tab3 }, "invalid replacement type" }, - { {subj, "a.c.", tab1 }, {subj, 1} }, - { {subj, "a.c.", tab2 }, {subj, 1} }, - { {subj, "a.c.", tab3 }, {subj, 1} }, + { {subj, "a.c.", tab1 }, {subj, 1, 0} }, + { {subj, "a.c.", tab2 }, {subj, 1, 0} }, + { {subj, "a.c.", tab3 }, {subj, 1, 0} }, + } +end + +local function set_f_gsub8 (lib, flg) + local subj, patt, repl = "abcdef", "..", "*" + return { + Name = "Function gsub, set8", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, patt, repl, function() end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return false end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return true end }, {"***", 3, 3} }, + { {subj, patt, repl, function() return {} end }, {"***", 3, 3} }, + { {subj, patt, repl, function() return "#" end }, {"###", 3, 3} }, + { {subj, patt, repl, function() return 57 end }, {"575757", 3, 3} }, + { {subj, patt, repl, function (from) return from end }, {"135", 3, 3} }, + { {subj, patt, repl, function (from, to) return to end }, {"246", 3, 3} }, + { {subj, patt, repl, function (from,to,rep) return rep end }, + {"***", 3, 3} }, + { {subj, patt, repl, function (from, to, rep) return rep..to..from end }, + {"*21*43*65", 3, 3} }, + { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil, nil end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil, false end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil, true end }, {"ab**", 3, 2} }, + { {subj, patt, repl, function() return true, true end }, {"***", 3, 3} }, + { {subj, patt, repl, function() return nil, 0 end }, {"abcdef", 1, 0} }, + { {subj, patt, repl, function() return true, 0 end }, {"*cdef", 1, 1} }, + { {subj, patt, repl, function() return nil, 1 end }, {"ab*ef", 2, 1} }, + { {subj, patt, repl, function() return true, 1 end }, {"**ef", 2, 2} }, } end @@ -259,6 +296,7 @@ return function (libname) set_f_gsub4 (lib), set_f_gsub5 (lib), set_f_gsub6 (lib), + set_f_gsub8 (lib), set_f_plainfind (lib), } end diff --git a/test/pcre_sets.lua b/test/pcre_sets.lua index 865a10f..1897398 100755 --- a/test/pcre_sets.lua +++ b/test/pcre_sets.lua @@ -107,8 +107,10 @@ return function (libname) set_f_find (lib, flags), set_m_exec (lib, flags), set_m_tfind (lib, flags), - set_named_subpatterns (lib, flags), } + if flags.MAJOR >= 4 then + table.insert (sets, set_named_subpatterns (lib, flags)) + end if flags.MAJOR >= 6 then table.insert (sets, set_m_dfa_exec (lib, flags)) end diff --git a/test/pcre_sets2.lua b/test/pcre_sets2.lua index 7616540..8bdb3a3 100755 --- a/test/pcre_sets2.lua +++ b/test/pcre_sets2.lua @@ -13,15 +13,16 @@ local function set_f_gsub4 (lib, flg) local set = { Name = "Function gsub, set4", Func = get_gsub (lib), - --{ s, p, f, n, res1, res2 }, - { {"/* */ */", "%/%*(.*)%*%/", "#" }, {"#", 1} }, - { {"a2c3", ".-", "#" }, {"#a#2#c#3#", 5} }, -- test .- - { {"/**/", "%/%*(.-)%*%/", "#" }, {"#", 1} }, - { {"/* */ */", "%/%*(.-)%*%/", "#" }, {"# */", 1} }, - { {"a2c3", "%d", "#" }, {"a#c#", 2} }, -- test %d - { {"a2c3", "%D", "#" }, {"#2#3", 2} }, -- test %D - { {"a \t\nb", "%s", "#" }, {"a###b", 3} }, -- test %s - { {"a \t\nb", "%S", "#" }, {"# \t\n#", 2} }, -- test %S + --{ s, p, f, n, res1, res2, res3 }, + { {"/* */ */", "%/%*(.*)%*%/", "#" }, {"#", 1, 1} }, + { {"a2c3", ".-", "#" }, {"#a#2#c#3#", 5, 5} }, -- test .- + { {"/**/", "%/%*(.-)%*%/", "#" }, {"#", 1, 1} }, + { {"/* */ */", "%/%*(.-)%*%/", "#" }, {"# */", 1, 1} }, + { {"a2c3", "%d", "#" }, {"a#c#", 2, 2} }, -- test %d + { {"a2c3", "%D", "#" }, {"#2#3", 2, 2} }, -- test %D + { {"a \t\nb", "%s", "#" }, {"a###b", 3, 3} }, -- test %s + { {"a \t\nb", "%S", "#" }, {"# \t\n#", 2, 2} }, -- test %S + { {"abcd", "\\b", "%1"}, {"abcd", 2, 2} }, } -- convert patterns: lua -> pcre for _, test in ipairs (set) do @@ -157,7 +158,7 @@ local function set_f_gsub7 (lib, flg) -- fill in reference results for _,v in ipairs(set) do local r0, r1, r2 = pcall (string.gsub, unpack (v[1])) - v[2] = r0 and { r1, r2 } or { r0, r1 } + v[2] = r0 and { r1, r2, r2 } or { r0, r1 } end -- convert patterns: lua -> pcre for _, test in ipairs (set) do @@ -169,9 +170,12 @@ end return function (libname) local lib = require (libname) local flags = lib.flags and lib.flags () - return { + local sets = { set_f_gsub4 (lib, flags), - set_f_gsub7 (lib, flags), } + if flags.MAJOR*100 + flags.MINOR > 405 then + table.insert (sets, set_f_gsub7 (lib, flags)) + end + return sets end |