From e98c08314819bc5f609889087b329d0b78180d41 Mon Sep 17 00:00:00 2001 From: Shmuel Zeigerman Date: Mon, 13 Feb 2012 23:38:07 +0200 Subject: TRE binding: add wide-character functions. --- src/common.c | 46 ++----------------------------------------- src/common.h | 3 ++- src/tre/Makefile | 1 - src/tre/ltre_w.c | 45 ++++++++++++++++++++++++++++++++++++++++-- test/tre_sets.lua | 31 +++++++++++++++++++++++++++++ windows/mingw/_mingw.mak | 4 ++-- windows/mingw/rex_gnu.mak | 2 +- windows/mingw/rex_onig.mak | 2 +- windows/mingw/rex_pcre.mak | 2 +- windows/mingw/rex_spencer.mak | 2 +- windows/mingw/rex_tre.mak | 4 ++-- 11 files changed, 86 insertions(+), 56 deletions(-) diff --git a/src/common.c b/src/common.c index 09d595c..0b444c0 100644 --- a/src/common.c +++ b/src/common.c @@ -166,7 +166,7 @@ void buffer_addvalue (TBuffer *buf, int stackpos) { buffer_addlstring (buf, p, len); } -static void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len) { +void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len) { int n; size_t header[2] = { ID_STRING }; header[1] = len; @@ -176,7 +176,7 @@ static void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len) { if (n) buffer_addlstring (buf, NULL, N_ALIGN - n); } -static void bufferZ_addnum (TBuffer *buf, size_t num) { +void bufferZ_addnum (TBuffer *buf, size_t num) { size_t header[2] = { ID_NUMBER }; header[1] = num; buffer_addlstring (buf, header, sizeof (header)); @@ -220,47 +220,6 @@ void bufferZ_putrepstring (TBuffer *BufRep, int reppos, int nsub) { } } -#ifdef WIDE_CHAR_GSUB -/* 1. When called repeatedly on the same TBuffer, its existing data - is discarded and overwritten by the new data. - 2. The TBuffer's array is never shrunk by this function. -*/ -void bufferZ_putrepstringW (TBuffer *BufRep, int reppos, int nsub) { - wchar_t dbuf[] = { 0, 0 }; - size_t replen; - const wchar_t *p = (const wchar_t*) lua_tolstring (BufRep->L, reppos, &replen); - replen /= sizeof(wchar_t); - const wchar_t *end = p + replen; - BufRep->top = 0; - while (p < end) { - const wchar_t *q; - for (q = p; q < end && *q != L'%'; ++q) - {} - if (q != p) - bufferZ_addlstring (BufRep, p, (q - p) * sizeof(wchar_t)); - if (q < end) { - if (++q < end) { /* skip % */ - if (iswdigit (*q)) { - int num; - *dbuf = *q; - num = wcstol (dbuf, NULL, 10); - if (num == 1 && nsub == 0) - num = 0; - else if (num > nsub) { - freelist_free (BufRep->freelist); - luaL_error (BufRep->L, "invalid capture index"); - } - bufferZ_addnum (BufRep, num); - } - else bufferZ_addlstring (BufRep, q, 1 * sizeof(wchar_t)); - } - p = q + 1; - } - else break; - } -} -#endif - /****************************************************************************** The intended use of this function is as follows: size_t iter = 0; @@ -287,4 +246,3 @@ int bufferZ_next (TBuffer *buf, size_t *iter, size_t *num, const char **str) { } return 0; } - diff --git a/src/common.h b/src/common.h index 04ef7cb..95aa79d 100644 --- a/src/common.h +++ b/src/common.h @@ -83,8 +83,9 @@ void buffer_addvalue (TBuffer *buf, int stackpos); void buffer_pushresult (TBuffer *buf); void bufferZ_putrepstring (TBuffer *buf, int reppos, int nsub); -void bufferZ_putrepstringW (TBuffer *buf, int reppos, int nsub); int bufferZ_next (TBuffer *buf, size_t *iter, size_t *len, const char **str); +void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len); +void bufferZ_addnum (TBuffer *buf, size_t num); int get_int_field (lua_State *L, const char* field); void set_int_field (lua_State *L, const char* field, int val); diff --git a/src/tre/Makefile b/src/tre/Makefile index 2350108..2d3964e 100644 --- a/src/tre/Makefile +++ b/src/tre/Makefile @@ -26,7 +26,6 @@ REGNAME = tre # === END OF USER SETTINGS === OBJ = ltre.o ltre_w.o ../common.o -MYCFLAGS = -DWIDE_CHAR_GSUB include ../common.mak diff --git a/src/tre/ltre_w.c b/src/tre/ltre_w.c index 7f8f87b..d3bceb7 100644 --- a/src/tre/ltre_w.c +++ b/src/tre/ltre_w.c @@ -10,6 +10,8 @@ #include +void bufferZ_putrepstringW (TBuffer *BufRep, int reppos, int nsub); + /* These 2 settings may be redefined from the command-line or the makefile. * They should be kept in sync between themselves and with the target name. */ @@ -132,8 +134,8 @@ static int generic_atfind (lua_State *L, int tfind) { res_match.pmatch = ud->match; /* execute the search */ - res = tre_reganexec (&ud->r, argE.text, argE.textlen - argE.startoffset, - &res_match, argP, argE.eflags); + res = tre_regawnexec (&ud->r, (const wchar_t*)argE.text, + (argE.textlen - argE.startoffset)/ALG_CHARSIZE, &res_match, argP, argE.eflags); if (ALG_ISMATCH (res)) { ALG_PUSHOFFSETS (L, ud, argE.startoffset, 0); if (tfind) @@ -218,3 +220,42 @@ void add_wide_lib (lua_State *L, int methods) { luaL_register(L, NULL, methods ? posixmeta : rexlib); } + +/* 1. When called repeatedly on the same TBuffer, its existing data + is discarded and overwritten by the new data. + 2. The TBuffer's array is never shrunk by this function. +*/ +void bufferZ_putrepstringW (TBuffer *BufRep, int reppos, int nsub) { + wchar_t dbuf[] = { 0, 0 }; + size_t replen; + const wchar_t *p = (const wchar_t*) lua_tolstring (BufRep->L, reppos, &replen); + replen /= sizeof(wchar_t); + const wchar_t *end = p + replen; + BufRep->top = 0; + while (p < end) { + const wchar_t *q; + for (q = p; q < end && *q != L'%'; ++q) + {} + if (q != p) + bufferZ_addlstring (BufRep, p, (q - p) * sizeof(wchar_t)); + if (q < end) { + if (++q < end) { /* skip % */ + if (iswdigit (*q)) { + int num; + *dbuf = *q; + num = wcstol (dbuf, NULL, 10); + if (num == 1 && nsub == 0) + num = 0; + else if (num > nsub) { + freelist_free (BufRep->freelist); + luaL_error (BufRep->L, "invalid capture index"); + } + bufferZ_addnum (BufRep, num); + } + else bufferZ_addlstring (BufRep, q, 1 * sizeof(wchar_t)); + } + p = q + 1; + } + else break; + } +} diff --git a/test/tre_sets.lua b/test/tre_sets.lua index 89f00b6..e755e03 100644 --- a/test/tre_sets.lua +++ b/test/tre_sets.lua @@ -115,6 +115,21 @@ local function set_m_wexec (lib, flg) } end +local function set_m_waexec (lib, flg) + return { + Name = "Method waexec", + Method = "waexec", + --{patt}, {subj, st} { results } + { {L".+"}, {L"abcd",{}}, {1,4,{}} }, -- [none] + { {L".+"}, {L"abcd",{},2}, {2,4,{}} }, -- positive st + { {L".+"}, {L"abcd",{},-2}, {3,4,{}} }, -- negative st + { {L".*"}, {L"abcd",{}}, {1,4,{}} }, -- [none] + { {L"bc"}, {L"abc", {}}, {2,3,{}} }, -- [none] + { {L "(.)b.(d)"}, {L"abcd",{}}, {1,4,{1,1,4,4}}},--[captures] + { {L"(a+)6+(b+)"}, {L"Taa66bbT",{},2}, {2,7,{2,3,6,7}}},--[st+captures] + } +end + local function set_m_wtfind (lib, flg) return { Name = "Method wtfind", @@ -129,6 +144,20 @@ local function set_m_wtfind (lib, flg) } end +local function set_m_watfind (lib, flg) + return { + Name = "Method watfind", + Method = "watfind", + --{patt}, {subj, st} { results } + { {L".+"}, {L"abcd",{}}, {1,4,{}} }, -- [none] + { {L".+"}, {L"abcd",{},2}, {2,4,{}} }, -- positive st + { {L".+"}, {L"abcd",{},-2}, {3,4,{}} }, -- negative st + { {L".*"}, {L"abcd",{}}, {1,4,{}} }, -- [none] + { {L"bc"}, {L"abc", {}}, {2,3,{}} }, -- [none] + { {L"(.)b.(d)"}, {L"abcd",{}}, {1,4,{L"a",L"d"}}},--[captures] + } +end + local function set_m_wfind (lib, flg) return { Name = "Method wfind", @@ -308,7 +337,9 @@ return function (libname) set_f_wfind (lib), set_f_wmatch (lib), set_m_wexec (lib), + set_m_waexec (lib), set_m_wtfind (lib), + set_m_watfind (lib), set_m_wfind (lib), set_m_wmatch (lib), set_f_wgsub1 (lib), diff --git a/windows/mingw/_mingw.mak b/windows/mingw/_mingw.mak index c7d9ba3..169c962 100644 --- a/windows/mingw/_mingw.mak +++ b/windows/mingw/_mingw.mak @@ -42,8 +42,8 @@ $(BIN): $(OBJ) $(DEFFILE) $(CC) $(DEFFILE) $(OBJ) $(LIBS) -o $@ -shared $(DEFFILE): - lua -e"print('EXPORTS') for k,v in ipairs{$(EXPORTED)} do \ - print('\t'..v) end" > $@ + echo EXPORTS > $@ + for %%d in ($(EXPORTED)) do echo %%d>> $@ $(BININSTALL): $(BIN) copy /Y $< $@ diff --git a/windows/mingw/rex_gnu.mak b/windows/mingw/rex_gnu.mak index 0675658..965095e 100644 --- a/windows/mingw/rex_gnu.mak +++ b/windows/mingw/rex_gnu.mak @@ -10,7 +10,7 @@ MYINCS = -I$(REGEXINC) -I$(LUAINC) MYLIBS = -L$(DLLPATH) -lregex2 -l$(LUADLL) OBJ = lgnu.o common.o MYCFLAGS = -W -Wall -O2 -EXPORTED = 'luaopen_$(PROJECT)' +EXPORTED = luaopen_$(PROJECT) SRCPATH = ..\..\src;..\..\src\gnu TESTPATH = ..\..\test TESTNAME = gnu diff --git a/windows/mingw/rex_onig.mak b/windows/mingw/rex_onig.mak index b69d3a9..a179362 100644 --- a/windows/mingw/rex_onig.mak +++ b/windows/mingw/rex_onig.mak @@ -10,7 +10,7 @@ MYINCS = -I$(REGEXINC) -I$(LUAINC) MYLIBS = -L$(DLLPATH) -lonig -l$(LUADLL) -Wl,--enable-auto-import OBJ = lonig.o lonig_f.o common.o MYCFLAGS = -W -Wall -O2 -EXPORTED = 'luaopen_$(PROJECT)' +EXPORTED = luaopen_$(PROJECT) SRCPATH = ..\..\src;..\..\src\oniguruma TESTPATH = ..\..\test TESTNAME = onig diff --git a/windows/mingw/rex_pcre.mak b/windows/mingw/rex_pcre.mak index d134b05..ef68352 100644 --- a/windows/mingw/rex_pcre.mak +++ b/windows/mingw/rex_pcre.mak @@ -10,7 +10,7 @@ MYINCS = -I$(REGEXINC) -I$(LUAINC) MYLIBS = -L$(DLLPATH) -lpcre -l$(LUADLL) OBJ = lpcre.o lpcre_f.o common.o MYCFLAGS = -W -Wall -O2 -EXPORTED = 'luaopen_$(PROJECT)' +EXPORTED = luaopen_$(PROJECT) SRCPATH = ..\..\src;..\..\src\pcre TESTPATH = ..\..\test TESTNAME = pcre diff --git a/windows/mingw/rex_spencer.mak b/windows/mingw/rex_spencer.mak index 23ae67c..bc00d8c 100644 --- a/windows/mingw/rex_spencer.mak +++ b/windows/mingw/rex_spencer.mak @@ -10,7 +10,7 @@ MYINCS = -I$(REGEXINC) -I$(LUAINC) MYLIBS = -L$(DLLPATH) -lrxspencer -l$(LUADLL) OBJ = lposix.o common.o MYCFLAGS = -W -Wall -O2 -EXPORTED = 'luaopen_$(PROJECT)' +EXPORTED = luaopen_$(PROJECT) SRCPATH = ..\..\src;..\..\src\posix TESTPATH = ..\..\test TESTNAME = spencer diff --git a/windows/mingw/rex_tre.mak b/windows/mingw/rex_tre.mak index 14507a6..8c167b3 100644 --- a/windows/mingw/rex_tre.mak +++ b/windows/mingw/rex_tre.mak @@ -9,8 +9,8 @@ PROJECT = rex_tre MYINCS = -I$(REGEXINC) -I$(LUAINC) MYLIBS = -L$(DLLPATH) -ltre -l$(LUADLL) OBJ = ltre.o ltre_w.o common.o -MYCFLAGS = -W -Wall -O2 -DWIDE_CHAR_GSUB -EXPORTED = 'luaopen_$(PROJECT)' +MYCFLAGS = -W -Wall -O2 +EXPORTED = luaopen_$(PROJECT) SRCPATH = ..\..\src;..\..\src\tre TESTPATH = ..\..\test TESTNAME = tre -- cgit v1.2.1