summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShmuel Zeigerman <solomuz0@gmail.com>2012-02-12 23:31:32 +0200
committerShmuel Zeigerman <solomuz0@gmail.com>2012-02-12 23:31:32 +0200
commit9a4df7ffa58f90a802b9ce5704cb2452cb54fc9e (patch)
tree694283108715fc81066601b20d9b9e1705c9ada5
parentf69b66572dc66fab1c52df25440a97cfc2ecc9e4 (diff)
downloadlrexlib-9a4df7ffa58f90a802b9ce5704cb2452cb54fc9e.tar.gz
TRE binding: add wide-character functions.
-rw-r--r--src/algo.h18
-rw-r--r--src/common.c39
-rw-r--r--src/common.h1
-rw-r--r--src/tre/ltre_w.c1
-rw-r--r--test/tre_sets.lua138
5 files changed, 121 insertions, 76 deletions
diff --git a/src/algo.h b/src/algo.h
index 65db6c5..d5ad9d7 100644
--- a/src/algo.h
+++ b/src/algo.h
@@ -17,6 +17,10 @@ static int generate_error (lua_State *L, const TUserdata *ud, int errcode);
# define ALG_CHARSIZE 1
#endif
+#ifndef BUFFERZ_PUTREPSTRING
+# define BUFFERZ_PUTREPSTRING bufferZ_putrepstring
+#endif
+
#ifndef ALG_GETCARGS
# define ALG_GETCARGS(a,b,c)
#endif
@@ -213,7 +217,7 @@ static int gsub (lua_State *L) {
/*------------------------------------------------------------------*/
if (argE.reptype == LUA_TSTRING) {
buffer_init (&BufRep, 256, L, &freelist);
- bufferZ_putrepstring (&BufRep, argE.funcpos, ALG_NSUB(ud));
+ BUFFERZ_PUTREPSTRING (&BufRep, argE.funcpos, ALG_NSUB(ud));
}
/*------------------------------------------------------------------*/
if (argE.maxmatch == GSUB_CONDITIONAL) {
@@ -231,8 +235,8 @@ static int gsub (lua_State *L) {
#ifdef ALG_USERETRY
if (retry) {
if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
- buffer_addlstring (&BufOut, argE.text + st, 1);
- ++st;
+ buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
+ st += ALG_CHARSIZE;
retry = 0;
continue;
}
@@ -309,8 +313,8 @@ static int gsub (lua_State *L) {
if (argE.maxmatch == GSUB_CONDITIONAL) {
/* Call the function */
lua_pushvalue (L, argE.funcpos2);
- lua_pushinteger (L, from + 1);
- lua_pushinteger (L, to);
+ lua_pushinteger (L, from/ALG_CHARSIZE + 1);
+ lua_pushinteger (L, to/ALG_CHARSIZE);
if (argE.reptype == LUA_TSTRING)
buffer_pushresult (&BufTemp);
else {
@@ -359,8 +363,8 @@ static int gsub (lua_State *L) {
retry = 1;
#else
/* advance by 1 char (not replaced) */
- buffer_addlstring (&BufOut, argE.text + st, 1);
- ++st;
+ buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
+ st += ALG_CHARSIZE;
#endif
}
else break;
diff --git a/src/common.c b/src/common.c
index ae6147c..03a2f1d 100644
--- a/src/common.c
+++ b/src/common.c
@@ -220,6 +220,45 @@ void bufferZ_putrepstring (TBuffer *BufRep, int reppos, int nsub) {
}
}
+/* 1. When called repeatedly on the same TBuffer, its existing data
+ is discarded and overwritten by the new data.
+ 2. The TBuffer's array is never shrunk by this function.
+*/
+void bufferZ_putrepstringW (TBuffer *BufRep, int reppos, int nsub) {
+ char dbuf[] = { 0, 0 };
+ size_t replen;
+ const wchar_t *p = (const wchar_t*) lua_tolstring (BufRep->L, reppos, &replen);
+ replen /= sizeof(wchar_t);
+ const wchar_t *end = p + replen;
+ BufRep->top = 0;
+ while (p < end) {
+ const wchar_t *q;
+ for (q = p; q < end && *q != L'%'; ++q)
+ {}
+ if (q != p)
+ bufferZ_addlstring (BufRep, p, (q - p) * sizeof(wchar_t));
+ if (q < end) {
+ if (++q < end) { /* skip % */
+ if (iswdigit (*q)) {
+ int num;
+ *dbuf = *q & 0xFF;
+ num = atoi (dbuf);
+ if (num == 1 && nsub == 0)
+ num = 0;
+ else if (num > nsub) {
+ freelist_free (BufRep->freelist);
+ luaL_error (BufRep->L, "invalid capture index");
+ }
+ bufferZ_addnum (BufRep, num);
+ }
+ else bufferZ_addlstring (BufRep, q, 1 * sizeof(wchar_t));
+ }
+ p = q + 1;
+ }
+ else break;
+ }
+}
+
/******************************************************************************
The intended use of this function is as follows:
size_t iter = 0;
diff --git a/src/common.h b/src/common.h
index de8d52d..04ef7cb 100644
--- a/src/common.h
+++ b/src/common.h
@@ -83,6 +83,7 @@ void buffer_addvalue (TBuffer *buf, int stackpos);
void buffer_pushresult (TBuffer *buf);
void bufferZ_putrepstring (TBuffer *buf, int reppos, int nsub);
+void bufferZ_putrepstringW (TBuffer *buf, int reppos, int nsub);
int bufferZ_next (TBuffer *buf, size_t *iter, size_t *len, const char **str);
int get_int_field (lua_State *L, const char* field);
diff --git a/src/tre/ltre_w.c b/src/tre/ltre_w.c
index bd3ca1e..7f8f87b 100644
--- a/src/tre/ltre_w.c
+++ b/src/tre/ltre_w.c
@@ -25,6 +25,7 @@
#define ALG_CFLAGS_DFLT REG_EXTENDED
#define ALG_EFLAGS_DFLT 0
#define ALG_CHARSIZE 2
+#define BUFFERZ_PUTREPSTRING bufferZ_putrepstringW
#define ALG_NOMATCH(res) ((res) == REG_NOMATCH)
#define ALG_ISMATCH(res) ((res) == 0)
diff --git a/test/tre_sets.lua b/test/tre_sets.lua
index 2b25b48..b2169a1 100644
--- a/test/tre_sets.lua
+++ b/test/tre_sets.lua
@@ -185,7 +185,7 @@ local function set_f_wgsub1 (lib, flg)
return {
Name = "Function wgsub, set1",
Func = get_wgsub (lib),
- --{ s, p, f, n, res1, res2, res3 },
+ --{ s, p, f, n, res1, res2, res3 },
{ {subj, cpat, L"", 0}, {subj, 0, 0} }, -- test "n" + empty_replace
{ {subj, pat, L"", 0}, {subj, 0, 0} }, -- test "n" + empty_replace
{ {subj, pat, L"", -1}, {subj, 0, 0} }, -- test "n" + empty_replace
@@ -207,7 +207,7 @@ local function set_f_wgsub2 (lib, flg)
return {
Name = "Function wgsub, set2",
Func = get_wgsub (lib),
- --{ s, p, f, n, res1, res2, res3 },
+ --{ s, p, f, n, res1, res2, res3 },
{ {subj, pat, L"<%1>" }, {L"<a>b<c>", 2, 2} }, -- test non-escaped chars in f
{ {subj, pat, L"%<%1%>" }, {L"<a>b<c>", 2, 2} }, -- test escaped chars in f
{ {subj, pat, L"" }, {L"b", 2, 2} }, -- test empty replace
@@ -224,12 +224,12 @@ local function set_f_wgsub3 (lib, flg)
return {
Name = "Function wgsub, set3",
Func = get_wgsub (lib),
- --{ s, p, f, n, res1,res2,res3 },
- { {L"abc", L"a", "%0" }, {"abc", 1, 1} }, -- test (in)valid capture index
- { {L"abc", L"a", "%1" }, {"abc", 1, 1} },
- { {L"abc", L"[ac]", "%1" }, {"abc", 2, 2} },
- { {L"abc", L"(a)", "%1" }, {"abc", 1, 1} },
- { {L"abc", L"(a)", "%2" }, "invalid capture index" },
+ --{ s, p, f, n, res1,res2,res3 },
+ { {L"abc", L"a", L"%0" }, {L"abc", 1, 1} }, -- test (in)valid capture index
+ { {L"abc", L"a", L"%1" }, {L"abc", 1, 1} },
+ { {L"abc", L"[ac]", L"%1" }, {L"abc", 2, 2} },
+ { {L"abc", L"(a)", L"%1" }, {L"abc", 1, 1} },
+ { {L"abc", L"(a)", L"%2" }, "invalid capture index" },
}
end
@@ -237,87 +237,87 @@ local function set_f_wgsub4 (lib, flg)
return {
Name = "Function wgsub, set4",
Func = get_wgsub (lib),
- --{ s, p, f, n, res1, res2, res3 },
- { {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test .
- { {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+
- { {"a2c3", ".*", "#" }, {"##", 2, 2} }, -- test .*
- { {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1, 1} },
- { {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d
- { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D
- { {"a \t\nb", "[ \t\n]", "#" }, {"a###b", 3, 3} }, -- test %s
- { {"a \t\nb", "[^ \t\n]", "#" }, {"# \t\n#", 2, 2} }, -- test %S
+ --{ s, p, f, n, res1, res2, res3 },
+ { {L"a2c3", L".", L"#" }, {L"####", 4, 4} }, -- test .
+ { {L"a2c3", L".+", L"#" }, {L"#", 1, 1} }, -- test .+
+ { {L"a2c3", L".*", L"#" }, {L"##", 2, 2} }, -- test .*
+ { {L"/* */ */", L"\\/\\*(.*)\\*\\/", L"#" }, {L"#", 1, 1} },
+ { {L"a2c3", L"[0-9]", L"#" }, {L"a#c#", 2, 2} }, -- test %d
+ { {L"a2c3", L"[^0-9]", L"#" }, {L"#2#3", 2, 2} }, -- test %D
+ { {L"a \t\nb", L"[ \t\n]", L"#" }, {L"a###b", 3, 3} }, -- test %s
+ { {L"a \t\nb", L"[^ \t\n]", L"#" }, {L"# \t\n#", 2, 2} }, -- test %S
}
end
local function set_f_wgsub5 (lib, flg)
local function frep1 () end -- returns nothing
- local function frep2 () return "#" end -- ignores arguments
- local function frep3 (...) return table.concat({...}, ",") end -- "normal"
+ local function frep2 () return L"#" end -- ignores arguments
+ local function frep3 (...) return table.concat({...}, L",") end -- "normal"
local function frep4 () return {} end -- invalid return type
- local function frep5 () return "7", "a" end -- 2-nd return is "a"
- local function frep6 () return "7", "break" end -- 2-nd return is "break"
- local subj = "a2c3"
+ local function frep5 () return L"7", L"a" end -- 2-nd return is "a"
+ local function frep6 () return L"7", "break" end -- 2-nd return is "break"
+ local subj = L"a2c3"
return {
Name = "Function wgsub, set5",
Func = get_wgsub (lib),
- --{ s, p, f, n, res1, res2, res3 },
- { {subj, "a(.)c(.)", frep1 }, {subj, 1, 0} },
- { {subj, "a(.)c(.)", frep2 }, {"#", 1, 1} },
- { {subj, "a(.)c(.)", frep3 }, {"2,3", 1, 1} },
- { {subj, "a.c.", frep3 }, {subj, 1, 1} },
- { {subj, "z*", frep1 }, {subj, 5, 0} },
- { {subj, "z*", frep2 }, {"#a#2#c#3#", 5, 5} },
- { {subj, "z*", frep3 }, {subj, 5, 5} },
- { {subj, subj, frep4 }, "invalid return type" },
- { {"abc",".", frep5 }, {"777", 3, 3} },
- { {"abc",".", frep6 }, {"777", 3, 3} },
+ --{ s, p, f, n, res1, res2, res3 },
+ { {subj, L"a(.)c(.)", frep1 }, {subj, 1, 0} },
+ { {subj, L"a(.)c(.)", frep2 }, {L"#", 1, 1} },
+ { {subj, L"a(.)c(.)", frep3 }, {L"2,3", 1, 1} },
+ { {subj, L"a.c.", frep3 }, {subj, 1, 1} },
+ { {subj, L"z*", frep1 }, {subj, 5, 0} },
+ { {subj, L"z*", frep2 }, {L"#a#2#c#3#", 5, 5} },
+ { {subj, L"z*", frep3 }, {subj, 5, 5} },
+ { {subj, subj, frep4 }, "invalid return type" },
+ { {L"abc",L".", frep5 }, {L"777", 3, 3} },
+ { {L"abc",L".", frep6 }, {L"777", 3, 3} },
}
end
local function set_f_wgsub6 (lib, flg)
- local tab1, tab2, tab3 = {}, { ["2"] = 56 }, { ["2"] = {} }
- local subj = "a2c3"
+ local tab1, tab2, tab3 = {}, { [L"2"] = 56 }, { [L"2"] = {} }
+ local subj = L"a2c3"
return {
Name = "Function wgsub, set6",
Func = get_wgsub (lib),
- --{ s, p, f, n, res1,res2,res3 },
- { {subj, "a(.)c(.)", tab1 }, {subj, 1, 0} },
- { {subj, "a(.)c(.)", tab2 }, {"56", 1, 1} },
- { {subj, "a(.)c(.)", tab3 }, "invalid replacement type" },
- { {subj, "a.c.", tab1 }, {subj, 1, 0} },
- { {subj, "a.c.", tab2 }, {subj, 1, 0} },
- { {subj, "a.c.", tab3 }, {subj, 1, 0} },
+ --{ s, p, f, n, res1,res2,res3 },
+ { {subj, L"a(.)c(.)", tab1 }, {subj, 1, 0} },
+ { {subj, L"a(.)c(.)", tab2 }, {"56", 1, 1} },
+ { {subj, L"a(.)c(.)", tab3 }, "invalid replacement type" },
+ { {subj, L"a.c.", tab1 }, {subj, 1, 0} },
+ { {subj, L"a.c.", tab2 }, {subj, 1, 0} },
+ { {subj, L"a.c.", tab3 }, {subj, 1, 0} },
}
end
local function set_f_wgsub8 (lib, flg)
- local subj, patt, repl = "abcdef", "..", "*"
+ local subj, patt, repl = L"abcdef", L"..", L"*"
return {
Name = "Function wgsub, set8",
Func = get_wgsub (lib),
--{ s, p, f, n, res1, res2, res3 },
- { {subj, patt, repl, function() end }, {"abcdef", 3, 0} },
- { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} },
- { {subj, patt, repl, function() return false end }, {"abcdef", 3, 0} },
- { {subj, patt, repl, function() return true end }, {"***", 3, 3} },
- { {subj, patt, repl, function() return {} end }, {"***", 3, 3} },
- { {subj, patt, repl, function() return "#" end }, {"###", 3, 3} },
- { {subj, patt, repl, function() return 57 end }, {"575757", 3, 3} },
- { {subj, patt, repl, function (from) return from end }, {"135", 3, 3} },
- { {subj, patt, repl, function (from, to) return to end }, {"246", 3, 3} },
+ { {subj, patt, repl, function() end }, {L"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return nil end }, {L"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return false end }, {L"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return true end }, {L"***", 3, 3} },
+ { {subj, patt, repl, function() return {} end }, {L"***", 3, 3} },
+ { {subj, patt, repl, function() return L"#" end }, {L"###", 3, 3} },
+ { {subj, patt, repl, function() return 57 end }, {"575757", 3, 3} },
+ { {subj, patt, repl, function (from) return from end }, {"135", 3, 3} },
+ { {subj, patt, repl, function (from, to) return to end }, {"246", 3, 3} },
{ {subj, patt, repl, function (from,to,rep) return rep end },
- {"***", 3, 3} },
+ {L"***", 3, 3} },
{ {subj, patt, repl, function (from, to, rep) return rep..to..from end },
- {"*21*43*65", 3, 3} },
- { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} },
- { {subj, patt, repl, function() return nil, nil end }, {"abcdef", 3, 0} },
- { {subj, patt, repl, function() return nil, false end }, {"abcdef", 3, 0} },
- { {subj, patt, repl, function() return nil, true end }, {"ab**", 3, 2} },
- { {subj, patt, repl, function() return true, true end }, {"***", 3, 3} },
- { {subj, patt, repl, function() return nil, 0 end }, {"abcdef", 1, 0} },
- { {subj, patt, repl, function() return true, 0 end }, {"*cdef", 1, 1} },
- { {subj, patt, repl, function() return nil, 1 end }, {"ab*ef", 2, 1} },
- { {subj, patt, repl, function() return true, 1 end }, {"**ef", 2, 2} },
+ {L"*".."21"..L"*".."43"..L"*".."65", 3, 3} },
+ { {subj, patt, repl, function() return nil end }, {L"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return nil, nil end }, {L"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return nil, false end }, {L"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return nil, true end }, {L"ab**", 3, 2} },
+ { {subj, patt, repl, function() return true, true end }, {L"***", 3, 3} },
+ { {subj, patt, repl, function() return nil, 0 end }, {L"abcdef", 1, 0} },
+ { {subj, patt, repl, function() return true, 0 end }, {L"*cdef", 1, 1} },
+ { {subj, patt, repl, function() return nil, 1 end }, {L"ab*ef", 2, 1} },
+ { {subj, patt, repl, function() return true, 1 end }, {L"**ef", 2, 2} },
}
end
@@ -334,12 +334,12 @@ return function (libname)
set_m_wfind (lib),
set_m_wmatch (lib),
set_f_wgsub1 (lib),
- --set_f_wgsub2 (lib),
- --set_f_wgsub3 (lib),
- --set_f_wgsub4 (lib),
- --set_f_wgsub5 (lib),
- --set_f_wgsub6 (lib),
- --set_f_wgsub8 (lib),
+ set_f_wgsub2 (lib),
+ set_f_wgsub3 (lib),
+ set_f_wgsub4 (lib),
+ set_f_wgsub5 (lib),
+ set_f_wgsub6 (lib),
+ set_f_wgsub8 (lib),
--set_f_plainfind (lib),
}
end