summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorShmuel Zeigerman <solomuz0@gmail.com>2012-02-12 17:20:40 +0200
committerShmuel Zeigerman <solomuz0@gmail.com>2012-02-12 17:20:40 +0200
commit377b4edbbbc5aa4dbdaa5a0a95b4227f94eeb85c (patch)
treeb9f2a6ca83698be4d647ab03bf8dc852c7d8f2e1 /src
parent6690dc14df6633589b5645599e4ba3140e82694b (diff)
downloadlrexlib-377b4edbbbc5aa4dbdaa5a0a95b4227f94eeb85c.tar.gz
1. TRE binding: add wide-character functions.
2. Test suite: fix and refactor.
Diffstat (limited to 'src')
-rw-r--r--src/algo.h16
-rw-r--r--src/tre/Makefile3
-rw-r--r--src/tre/ltre.c3
-rw-r--r--src/tre/ltre_w.c219
4 files changed, 234 insertions, 7 deletions
diff --git a/src/algo.h b/src/algo.h
index ccf0f0f..65db6c5 100644
--- a/src/algo.h
+++ b/src/algo.h
@@ -13,6 +13,10 @@ static int split_exec (TUserdata *ud, TArgExec *argE, int offset);
static int compile_regex (lua_State *L, const TArgComp *argC, TUserdata **pud);
static int generate_error (lua_State *L, const TUserdata *ud, int errcode);
+#ifndef ALG_CHARSIZE
+# define ALG_CHARSIZE 1
+#endif
+
#ifndef ALG_GETCARGS
# define ALG_GETCARGS(a,b,c)
#endif
@@ -72,11 +76,11 @@ static int get_startoffset(lua_State *L, int stackpos, size_t len) {
if(startoffset > 0)
startoffset--;
else if(startoffset < 0) {
- startoffset += len;
+ startoffset += len/ALG_CHARSIZE;
if(startoffset < 0)
startoffset = 0;
}
- return startoffset;
+ return startoffset*ALG_CHARSIZE;
}
@@ -447,10 +451,10 @@ static int gmatch_iter (lua_State *L) {
#ifdef ALG_USERETRY
SET_RETRY (retry, 1);
#else
- incr = 1;
+ incr = ALG_CHARSIZE;
#endif
}
- ALG_PUSHEND (L, ud, ALG_BASE(argE.startoffset)+incr, 0); /* update start offset */
+ lua_pushinteger(L, ALG_BASE(argE.startoffset) + incr + ALG_SUBEND(ud,0)); /* update start offset */
lua_replace (L, lua_upvalueindex (4));
#ifdef ALG_USERETRY
lua_pushinteger (L, retry);
@@ -501,9 +505,9 @@ static int split_iter (lua_State *L) {
res = split_exec (ud, &argE, newoffset);
if (ALG_ISMATCH (res)) {
- ALG_PUSHEND (L, ud, ALG_BASE(newoffset), 0); /* update start offset */
+ lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset */
lua_replace (L, lua_upvalueindex (4));
- lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : 1); /* update incr */
+ lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */
lua_replace (L, lua_upvalueindex (5));
/* push text preceding the match */
lua_pushlstring (L, argE.text + argE.startoffset,
diff --git a/src/tre/Makefile b/src/tre/Makefile
index 79458a4..2d3964e 100644
--- a/src/tre/Makefile
+++ b/src/tre/Makefile
@@ -25,7 +25,7 @@ REGNAME = tre
# ===========================================================================
# === END OF USER SETTINGS ===
-OBJ = ltre.o ../common.o
+OBJ = ltre.o ltre_w.o ../common.o
include ../common.mak
@@ -37,6 +37,7 @@ so_tre: $(TRG_SO)
# Dependencies
ltre.o: ltre.c ../common.h ../algo.h
+ltre_w.o: ltre_w.c ../common.h ../algo.h
../common.o: ../common.c ../common.h
# (End of Makefile)
diff --git a/src/tre/ltre.c b/src/tre/ltre.c
index 259646b..c612183 100644
--- a/src/tre/ltre.c
+++ b/src/tre/ltre.c
@@ -7,6 +7,7 @@
#include "lua.h"
#include "lauxlib.h"
#include "../common.h"
+extern void add_wide_lib (lua_State *L, int methods);
#include <tre/tre.h>
@@ -347,9 +348,11 @@ REX_API int REX_OPENLIB (lua_State *L)
lua_pushvalue(L, -1); /* mt.__index = mt */
lua_setfield(L, -2, "__index");
luaL_register (L, NULL, posixmeta);
+ add_wide_lib (L, 1);
/* register functions */
luaL_register (L, REX_LIBNAME, rexlib);
+ add_wide_lib (L, 0);
lua_pushliteral (L, REX_VERSION" (for TRE regexes)");
lua_setfield (L, -2, "_VERSION");
return 1;
diff --git a/src/tre/ltre_w.c b/src/tre/ltre_w.c
new file mode 100644
index 0000000..bd3ca1e
--- /dev/null
+++ b/src/tre/ltre_w.c
@@ -0,0 +1,219 @@
+/* ltre.c - Lua binding of TRE regular expressions library */
+/* See Copyright Notice in the file LICENSE */
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "lua.h"
+#include "lauxlib.h"
+#include "../common.h"
+
+#include <tre/tre.h>
+
+/* These 2 settings may be redefined from the command-line or the makefile.
+ * They should be kept in sync between themselves and with the target name.
+ */
+#ifndef REX_LIBNAME
+# define REX_LIBNAME "rex_tre"
+#endif
+#ifndef REX_OPENLIB
+# define REX_OPENLIB luaopen_rex_tre
+#endif
+
+#define REX_TYPENAME REX_LIBNAME"_regex"
+
+#define ALG_CFLAGS_DFLT REG_EXTENDED
+#define ALG_EFLAGS_DFLT 0
+#define ALG_CHARSIZE 2
+
+#define ALG_NOMATCH(res) ((res) == REG_NOMATCH)
+#define ALG_ISMATCH(res) ((res) == 0)
+#define ALG_SUBBEG(ud,n) (ALG_CHARSIZE * ud->match[n].rm_so)
+#define ALG_SUBEND(ud,n) (ALG_CHARSIZE * ud->match[n].rm_eo)
+#define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n))
+#define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0)
+#define ALG_NSUB(ud) ((int)ud->r.re_nsub)
+
+#define ALG_PUSHSUB(L,ud,text,n) \
+ lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n))
+
+#define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \
+ (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
+
+#define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, ((offs) + ALG_SUBBEG(ud,n))/ALG_CHARSIZE + 1)
+#define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, ((offs) + ALG_SUBEND(ud,n))/ALG_CHARSIZE)
+#define ALG_PUSHOFFSETS(L,ud,offs,n) \
+ (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n))
+
+#define ALG_BASE(st) (st)
+#define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT)
+
+typedef struct {
+ regex_t r;
+ regmatch_t * match;
+ int freed;
+} TPosix;
+
+#define TUserdata TPosix
+
+#include "../algo.h"
+
+/* Functions
+ ******************************************************************************
+ */
+
+static void checkarg_regaparams (lua_State *L, int stackpos, regaparams_t *argP) {
+ if (lua_type (L, stackpos) != LUA_TTABLE) /* allow for userdata? */
+ luaL_argerror (L, stackpos, "table expected");
+ lua_pushvalue (L, stackpos);
+ argP->cost_ins = get_int_field (L, "cost_ins");
+ argP->cost_del = get_int_field (L, "cost_del");
+ argP->cost_subst = get_int_field (L, "cost_subst");
+ argP->max_cost = get_int_field (L, "max_cost");
+ argP->max_ins = get_int_field (L, "max_ins");
+ argP->max_del = get_int_field (L, "max_del");
+ argP->max_subst = get_int_field (L, "max_subst");
+ argP->max_err = get_int_field (L, "max_err");
+ lua_pop (L, 1);
+}
+
+/* method r:atfind (s, params, [st], [ef]) */
+/* method r:aexec (s, params, [st], [ef]) */
+static void checkarg_atfind (lua_State *L, TArgExec *argE, TPosix **ud,
+ regaparams_t *argP) {
+ *ud = check_ud (L);
+ argE->text = luaL_checklstring (L, 2, &argE->textlen);
+ checkarg_regaparams (L, 3, argP);
+ argE->startoffset = get_startoffset (L, 4, argE->textlen);
+ argE->eflags = luaL_optint (L, 5, ALG_EFLAGS_DFLT);
+}
+
+static int generate_error (lua_State *L, const TPosix *ud, int errcode) {
+ char errbuf[80];
+ tre_regerror (errcode, &ud->r, errbuf, sizeof (errbuf));
+ return luaL_error (L, "%s", errbuf);
+}
+
+static int compile_regex (lua_State *L, const TArgComp *argC, TPosix **pud) {
+ int res;
+ TPosix *ud;
+
+ ud = (TPosix *)lua_newuserdata (L, sizeof (TPosix));
+ memset (ud, 0, sizeof (TPosix)); /* initialize all members to 0 */
+
+ res = tre_regwncomp (&ud->r, (const wchar_t*)argC->pattern, argC->patlen/ALG_CHARSIZE, argC->cflags);
+ if (res != 0)
+ return generate_error (L, ud, res);
+
+ if (argC->cflags & REG_NOSUB)
+ ud->r.re_nsub = 0;
+ ud->match = (regmatch_t *) Lmalloc (L, (ALG_NSUB(ud) + 1) * sizeof (regmatch_t));
+ lua_pushvalue (L, LUA_ENVIRONINDEX);
+ lua_setmetatable (L, -2);
+
+ if (pud) *pud = ud;
+ return 1;
+}
+
+static int generic_atfind (lua_State *L, int tfind) {
+ int res;
+ TArgExec argE;
+ TPosix *ud;
+ regaparams_t argP;
+ regamatch_t res_match;
+
+ checkarg_atfind (L, &argE, &ud, &argP);
+ if (argE.startoffset > (int)argE.textlen)
+ return lua_pushnil(L), 1;
+
+ argE.text += argE.startoffset;
+ res_match.nmatch = ALG_NSUB(ud) + 1;
+ res_match.pmatch = ud->match;
+
+ /* execute the search */
+ res = tre_reganexec (&ud->r, argE.text, argE.textlen - argE.startoffset,
+ &res_match, argP, argE.eflags);
+ if (ALG_ISMATCH (res)) {
+ ALG_PUSHOFFSETS (L, ud, argE.startoffset, 0);
+ if (tfind)
+ push_substring_table (L, ud, argE.text);
+ else
+ push_offset_table (L, ud, argE.startoffset);
+ /* set values in the dictionary part of the table */
+ set_int_field (L, "cost", res_match.cost);
+ set_int_field (L, "num_ins", res_match.num_ins);
+ set_int_field (L, "num_del", res_match.num_del);
+ set_int_field (L, "num_subst", res_match.num_subst);
+ return 3;
+ }
+ else if (ALG_NOMATCH (res))
+ return lua_pushnil (L), 1;
+ else
+ return generate_error (L, ud, res);
+}
+
+static int Ltre_atfind (lua_State *L) {
+ return generic_atfind (L, 1);
+}
+
+static int Ltre_aexec (lua_State *L) {
+ return generic_atfind (L, 0);
+}
+
+static int gmatch_exec (TUserdata *ud, TArgExec *argE) {
+ if (argE->startoffset > 0)
+ argE->eflags |= REG_NOTBOL;
+ argE->text += argE->startoffset;
+ return tre_regwnexec (&ud->r, (const wchar_t*)argE->text, (argE->textlen - argE->startoffset)/ALG_CHARSIZE,
+ ALG_NSUB(ud) + 1, ud->match, argE->eflags);
+}
+
+static void gmatch_pushsubject (lua_State *L, TArgExec *argE) {
+ lua_pushlstring (L, argE->text, argE->textlen);
+}
+
+static int findmatch_exec (TPosix *ud, TArgExec *argE) {
+ argE->text += argE->startoffset;
+ return tre_regwnexec (&ud->r, (const wchar_t*)argE->text, (argE->textlen - argE->startoffset)/ALG_CHARSIZE,
+ ALG_NSUB(ud) + 1, ud->match, argE->eflags);
+}
+
+static int gsub_exec (TPosix *ud, TArgExec *argE, int st) {
+ if (st > 0)
+ argE->eflags |= REG_NOTBOL;
+ return tre_regwnexec (&ud->r, (const wchar_t*)(argE->text+st), (argE->textlen-st)/ALG_CHARSIZE, ALG_NSUB(ud)+1,
+ ud->match, argE->eflags);
+}
+
+static int split_exec (TPosix *ud, TArgExec *argE, int offset) {
+ if (offset > 0)
+ argE->eflags |= REG_NOTBOL;
+ return tre_regwnexec (&ud->r, (const wchar_t*)(argE->text + offset), (argE->textlen - offset)/ALG_CHARSIZE,
+ ALG_NSUB(ud) + 1, ud->match, argE->eflags);
+}
+
+static const luaL_Reg posixmeta[] = {
+ { "wexec", ud_exec },
+ { "wtfind", ud_tfind },
+ { "wfind", ud_find },
+ { "wmatch", ud_match },
+ { "waexec", Ltre_aexec },
+ { "watfind", Ltre_atfind },
+ { NULL, NULL}
+};
+
+static const luaL_Reg rexlib[] = {
+ { "wmatch", match },
+ { "wfind", find },
+ { "wgmatch", gmatch },
+ { "wgsub", gsub },
+ { "wsplit", split },
+ { "wnew", ud_new },
+ { NULL, NULL }
+};
+
+/* Add the library */
+void add_wide_lib (lua_State *L, int methods)
+{
+ luaL_register(L, NULL, methods ? posixmeta : rexlib);
+}