summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/manual.txt39
-rw-r--r--src/gnu/lgnu.c150
-rw-r--r--test/emacs_sets.lua9
3 files changed, 83 insertions, 115 deletions
diff --git a/doc/manual.txt b/doc/manual.txt
index 0237b44..cda6f26 100644
--- a/doc/manual.txt
+++ b/doc/manual.txt
@@ -399,11 +399,12 @@ constants in the used library. They are formed as follows:
but for ONIG_OPTION_xxx constants, alias strings are created additionally,
e.g., the value of ONIG_OPTION_IGNORECASE constant becomes accessible via
either of two keys: ``"ONIG_OPTION_IGNORECASE"`` and ``"IGNORECASE"``.
-* **GNU**: the GNU library provides the flags ``not_bol``, which stops
- a beginning-of-line anchor from matching at the start of a string,
- ``not_eol``, which stops an end-of-line anchor from matching at the
- end of a string, and ``reverse`` which causes the search to be
- performed backwards.
+* **GNU**: the GNU library provides the flags ``not_bol``, which stops a
+ beginning-of-line anchor from matching at the start of a string, ``not_eol``,
+ which stops an end-of-line anchor from matching at the end of a string, and
+ ``backward`` which causes the search to be performed backwards, as well as the
+ RE_xxx syntax specifiers (as defined in regex.h), omitting the RE\_ prefix.
+ For example, RE_SYNTAX_GREP becomes ``SYNTAX_GREP`` in Lua.
------------------------------------------------------------
@@ -622,37 +623,17 @@ GNU-only functions and methods
new
---
-:funcdef:`rex.new (patt, [cf], [syn], [tr])`
+:funcdef:`rex.new (patt, [cf], [tr])`
-The *syntax* parameter (*syn*) must be one of the predefined strings that are
-formed from the RE_SYNTAX_xxx identifiers defined in regex.h, by means of
-omitting the RE_SYNTAX\_ part. For example, RE_SYNTAX_GREP becomes ``"GREP"`` on
-the Lua side. The default value, used when the parameter is not supplied or
-``nil``, is either ``"POSIX_EXTENDED"`` (at start-up), or the value set by the
-last setsyntax_ call.
+If the compilation flags (*cf*) are not supplied or ``nil``, the default syntax
+is ``SYNTAX_POSIX_EXTENDED``. Note that this is not the same as passing a value
+of zero, which is the same as ``SYNTAX_EMACS``.
The *translation* parameter (*tr*) is a map of eight-bit character codes (0 to
255 inclusive) to 8-bit characters (strings). If this parameter is given, the
pattern is translated at compilation time, and each string to be matched is
translated when it is being matched.
-setsyntax
----------
-
-:funcdef:`rex_gnu.setsyntax (syntax)`
-
-This function sets the default syntax for the GNU library (see the ``new``
-method above for the interpretation of the *syntax* parameter). The specified
-syntax will be further used for compiling string regex patterns by all relevant
-functions, unless the *syn* argument is passed to those functions explicitly.
-
-**Returns:** nothing
-
-**Examples:**
-
- 1. ``rex_gnu.setsyntax ("POSIX") -- use POSIX regex syntax as the default``
- 2. ``rex_gnu.setsyntax ("EMACS") -- use Emacs regex syntax as the default``
-
Oniguruma-only functions and methods
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/src/gnu/lgnu.c b/src/gnu/lgnu.c
index 1f9d76b..82e37fb 100644
--- a/src/gnu/lgnu.c
+++ b/src/gnu/lgnu.c
@@ -27,13 +27,13 @@
#define REX_TYPENAME REX_LIBNAME"_regex"
-#define ALG_CFLAGS_DFLT 0
+#define ALG_CFLAGS_DFLT RE_SYNTAX_POSIX_EXTENDED
#define ALG_EFLAGS_DFLT 0
-#define ALG_GETCFLAGS(L,pos) ALG_CFLAGS_DFLT
+#define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT)
-static void checkarg_compile (lua_State *L, int pos, TArgComp *argC);
-#define ALG_GETCARGS(a,b,c) checkarg_compile(a,b,c)
+static const unsigned char *gettranslate (lua_State *L, int pos);
+#define ALG_GETCARGS(L,pos,argC) argC->translate = gettranslate (L, pos)
#define ALG_NOMATCH(res) ((res) == -1 || (res) == -2)
#define ALG_ISMATCH(res) ((res) >= 0)
@@ -72,9 +72,9 @@ typedef struct {
*/
/* Execution flags, which we need to simulate as GNU does not use flags for this. */
-#define GNU_NOTBOL 1
-#define GNU_NOTEOL 2
-#define GNU_REVERSE 4
+#define GNU_NOTBOL 1
+#define GNU_NOTEOL 2
+#define GNU_BACKWARD 4
static int generate_error (lua_State *L, const TUserdata *ud, int errcode) {
const char *errmsg;
@@ -114,78 +114,20 @@ static const unsigned char *gettranslate (lua_State *L, int pos) {
return translate;
}
-typedef struct {
- const char * name;
- int value;
-} EncPair;
-
-/* ATTENTION:
- This array must always be kept alphabetically sorted, as it's used in the
- binary search, so take care when manually inserting new elements.
- */
-static EncPair Syntaxes[] = {
- { "AWK", RE_SYNTAX_AWK },
- { "ED", RE_SYNTAX_ED },
- { "EGREP", RE_SYNTAX_EGREP },
- { "EMACS", RE_SYNTAX_EMACS },
- { "GNU_AWK", RE_SYNTAX_GNU_AWK },
- { "GREP", RE_SYNTAX_GREP },
- { "POSIX_AWK", RE_SYNTAX_POSIX_AWK },
- { "POSIX_BASIC", RE_SYNTAX_POSIX_BASIC },
- { "POSIX_EGREP", RE_SYNTAX_POSIX_EGREP },
- { "POSIX_EXTENDED", RE_SYNTAX_POSIX_EXTENDED },
- { "POSIX_MINIMAL_BASIC", RE_SYNTAX_POSIX_MINIMAL_BASIC },
- { "POSIX_MINIMAL_EXTENDED", RE_SYNTAX_POSIX_MINIMAL_EXTENDED },
- { "SED", RE_SYNTAX_SED },
-};
-
-static int fcmp (const void *p1, const void *p2) {
- return strcmp (((EncPair*) p1)->name, ((EncPair*) p2)->name);
-}
-
-static int getsyntax (lua_State *L, int pos) {
- EncPair key, *found;
- if ((key.name = luaL_optstring (L, pos, NULL)) == NULL)
- return -1;
- found = (EncPair*) bsearch (&key, Syntaxes, sizeof (Syntaxes) / sizeof (EncPair),
- sizeof (EncPair), fcmp);
- if (found == NULL)
- luaL_argerror (L, pos, "invalid or unsupported syntax string");
- return found->value;
-}
-
-static void checkarg_compile (lua_State *L, int pos, TArgComp *argC) {
- argC->translate = gettranslate (L, pos);
- argC->gnusyn = getsyntax (L, pos + 1);
-}
-
static void seteflags (TGnu *ud, TArgExec *argE) {
ud->r.not_bol = (argE->eflags & GNU_NOTBOL) != 0;
ud->r.not_eol = (argE->eflags & GNU_NOTEOL) != 0;
}
-/*
- rex.setsyntax (syntax)
- @param syntax: one of the predefined strings listed in array 'Syntaxes'
- @return: nothing
-*/
-static int LGnu_setsyntax (lua_State *L) {
- (void) luaL_checkstring (L, 1);
- re_set_syntax (getsyntax (L, 1));
- return 0;
-}
-
static int compile_regex (lua_State *L, const TArgComp *argC, TGnu **pud) {
const char *res;
TGnu *ud;
- reg_syntax_t old_syntax = 0;
int ret;
ud = (TGnu *)lua_newuserdata (L, sizeof (TGnu));
memset (ud, 0, sizeof (TGnu)); /* initialize all members to 0 */
- if (argC->gnusyn >= 0)
- old_syntax = re_set_syntax (argC->gnusyn);
+ re_set_syntax (argC->cflags);
/* translate table is never written to, so this cast is safe */
ud->r.translate = (unsigned char *) argC->translate;
@@ -195,9 +137,6 @@ static int compile_regex (lua_State *L, const TArgComp *argC, TGnu **pud) {
ud->errmsg = res;
ret = generate_error (L, ud, 0);
} else {
- if (argC->cflags & REG_NOSUB)
- ud->r.no_sub = 1;
-
lua_pushvalue (L, LUA_ENVIRONINDEX);
lua_setmetatable (L, -2);
@@ -205,8 +144,6 @@ static int compile_regex (lua_State *L, const TArgComp *argC, TGnu **pud) {
ret = 1;
}
- if (argC->gnusyn >= 0)
- re_set_syntax (old_syntax);
return ret;
}
@@ -216,7 +153,7 @@ static int gmatch_exec (TUserdata *ud, TArgExec *argE) {
ud->r.not_bol = 1;
argE->text += argE->startoffset;
argE->textlen -= argE->startoffset;
- if (argE->eflags & GNU_REVERSE)
+ if (argE->eflags & GNU_BACKWARD)
return re_search (&ud->r, argE->text, argE->textlen, argE->textlen, -argE->textlen, &ud->match);
else
return re_search (&ud->r, argE->text, argE->textlen, 0, argE->textlen, &ud->match);
@@ -230,7 +167,7 @@ static int findmatch_exec (TGnu *ud, TArgExec *argE) {
argE->text += argE->startoffset;
argE->textlen -= argE->startoffset;
seteflags (ud, argE);
- if (argE->eflags & GNU_REVERSE)
+ if (argE->eflags & GNU_BACKWARD)
return re_search (&ud->r, argE->text, argE->textlen, argE->textlen, -argE->textlen, &ud->match);
else
return re_search (&ud->r, argE->text, argE->textlen, 0, argE->textlen, &ud->match);
@@ -240,7 +177,7 @@ static int gsub_exec (TGnu *ud, TArgExec *argE, int st) {
seteflags (ud, argE);
if (st > 0)
ud->r.not_bol = 1;
- if (argE->eflags & GNU_REVERSE)
+ if (argE->eflags & GNU_BACKWARD)
return re_search (&ud->r, argE->text + st, argE->textlen - st, argE->textlen - st, -(argE->textlen - st), &ud->match);
else
return re_search (&ud->r, argE->text + st, argE->textlen - st, 0, argE->textlen - st, &ud->match);
@@ -250,7 +187,7 @@ static int split_exec (TGnu *ud, TArgExec *argE, int offset) {
seteflags (ud, argE);
if (offset > 0)
ud->r.not_bol = 1;
- if (argE->eflags & GNU_REVERSE)
+ if (argE->eflags & GNU_BACKWARD)
return re_search (&ud->r, argE->text + offset, argE->textlen - offset, argE->textlen - offset, -(argE->textlen - offset), &ud->match);
else
return re_search (&ud->r, argE->text + offset, argE->textlen - offset, 0, argE->textlen - offset, &ud->match);
@@ -278,15 +215,67 @@ static int Gnu_tostring (lua_State *L) {
static flag_pair gnu_flags[] =
{
- { "not_bol", GNU_NOTBOL },
- { "not_eol", GNU_NOTEOL },
- { "reverse", GNU_REVERSE },
+ { "not_bol", GNU_NOTBOL },
+ { "not_eol", GNU_NOTEOL },
+ { "backward", GNU_BACKWARD },
+/*---------------------------------------------------------------------------*/
+ { NULL, 0 }
+};
+
+static flag_pair gnu_syntax_flags[] = {
+ /* Syntax flag sets. */
+ { "SYNTAX_EMACS", RE_SYNTAX_EMACS },
+ { "SYNTAX_AWK", RE_SYNTAX_AWK },
+ { "SYNTAX_GNU_AWK", RE_SYNTAX_GNU_AWK },
+ { "SYNTAX_POSIX_AWK", RE_SYNTAX_POSIX_AWK },
+ { "SYNTAX_POSIX_AWK", RE_SYNTAX_POSIX_AWK },
+ { "SYNTAX_EGREP", RE_SYNTAX_EGREP },
+ { "SYNTAX_POSIX_EGREP", RE_SYNTAX_POSIX_EGREP },
+ { "SYNTAX_ED", RE_SYNTAX_ED },
+ { "SYNTAX_SED", RE_SYNTAX_SED },
+ { "SYNTAX_POSIX_AWK", RE_SYNTAX_POSIX_AWK },
+ { "SYNTAX_GREP", RE_SYNTAX_GREP },
+ { "SYNTAX_POSIX_BASIC", RE_SYNTAX_POSIX_BASIC },
+ { "SYNTAX_POSIX_MINIMAL_BASIC", RE_SYNTAX_POSIX_MINIMAL_BASIC },
+ { "SYNTAX_POSIX_EXTENDED", RE_SYNTAX_POSIX_EXTENDED },
+ { "SYNTAX_POSIX_MINIMAL_EXTENDED", RE_SYNTAX_POSIX_MINIMAL_EXTENDED },
+
+ /* Individual syntax flags. */
+ { "BACKSLASH_ESCAPE_IN_LISTS", RE_BACKSLASH_ESCAPE_IN_LISTS },
+ { "BK_PLUS_QM", RE_BK_PLUS_QM },
+ { "CHAR_CLASSES", RE_CHAR_CLASSES },
+ { "CONTEXT_INDEP_ANCHORS", RE_CONTEXT_INDEP_ANCHORS },
+ { "CONTEXT_INDEP_OPS", RE_CONTEXT_INDEP_OPS },
+ { "CONTEXT_INVALID_OPS", RE_CONTEXT_INVALID_OPS },
+ { "DOT_NEWLINE", RE_DOT_NEWLINE },
+ { "DOT_NOT_NULL", RE_DOT_NOT_NULL },
+ { "HAT_LISTS_NOT_NEWLINE", RE_HAT_LISTS_NOT_NEWLINE },
+ { "INTERVALS", RE_INTERVALS },
+ { "LIMITED_OPS", RE_LIMITED_OPS },
+ { "NEWLINE_ALT", RE_NEWLINE_ALT },
+ { "NO_BK_BRACES", RE_NO_BK_BRACES },
+ { "NO_BK_PARENS", RE_NO_BK_PARENS },
+ { "NO_BK_REFS", RE_NO_BK_REFS },
+ { "NO_BK_VBAR", RE_NO_BK_VBAR },
+ { "NO_EMPTY_RANGES", RE_NO_EMPTY_RANGES },
+ { "UNMATCHED_RIGHT_PAREN_ORD", RE_UNMATCHED_RIGHT_PAREN_ORD },
+ { "NO_POSIX_BACKTRACKING", RE_NO_POSIX_BACKTRACKING },
+ { "NO_GNU_OPS", RE_NO_GNU_OPS },
+ { "DEBUG", RE_DEBUG },
+ { "INVALID_INTERVAL_ORD", RE_INVALID_INTERVAL_ORD },
+ { "ICASE", RE_ICASE },
+ { "CARET_ANCHORS_HERE", RE_CARET_ANCHORS_HERE },
+ { "CONTEXT_INVALID_DUP", RE_CONTEXT_INVALID_DUP },
+ { "NO_SUB", RE_NO_SUB },
+#ifdef RE_PLAIN
+ { "PLAIN", RE_PLAIN },
+#endif
/*---------------------------------------------------------------------------*/
{ NULL, 0 }
};
static int Gnu_get_flags (lua_State *L) {
- const flag_pair* fps[] = { gnu_flags, NULL };
+ const flag_pair* fps[] = { gnu_flags, gnu_syntax_flags, NULL };
return get_flags (L, fps);
}
@@ -309,15 +298,12 @@ static const luaL_reg rexlib[] = {
{ "new", ud_new },
{ "flags", Gnu_get_flags },
{ "plainfind", plainfind_func },
- { "setsyntax", LGnu_setsyntax },
{ NULL, NULL }
};
/* Open the library */
REX_API int REX_OPENLIB (lua_State *L)
{
- re_set_syntax (RE_SYNTAX_POSIX_EXTENDED);
-
/* create a new function environment to serve as a metatable for methods */
lua_newtable (L);
lua_pushvalue (L, -1);
diff --git a/test/emacs_sets.lua b/test/emacs_sets.lua
index 19f0a3b..fb2b024 100644
--- a/test/emacs_sets.lua
+++ b/test/emacs_sets.lua
@@ -12,7 +12,7 @@ local function set_f_gmatch (lib, flg)
-- gmatch (s, p, [cf], [ef])
local function test_gmatch (subj, patt)
local out, guard = {}, 10
- for a, b in lib.gmatch (subj, patt, nil, nil, nil, "EMACS") do
+ for a, b in lib.gmatch (subj, patt, flg.SYNTAX_EMACS, nil) do
table.insert (out, { norm(a), norm(b) })
guard = guard - 1
if guard == 0 then break end
@@ -31,7 +31,7 @@ local function set_f_split (lib, flg)
-- split (s, p, [cf], [ef])
local function test_split (subj, patt)
local out, guard = {}, 10
- for a, b, c in lib.split (subj, patt, nil, nil, nil, "EMACS") do
+ for a, b, c in lib.split (subj, patt, flg.SYNTAX_EMACS, nil) do
table.insert (out, { norm(a), norm(b), norm(c) })
guard = guard - 1
if guard == 0 then break end
@@ -48,8 +48,9 @@ end
return function (libname)
local lib = require (libname)
+ local flags = lib.flags ()
return {
- set_f_gmatch (lib),
- set_f_split (lib),
+ set_f_gmatch (lib, flags),
+ set_f_split (lib, flags),
}
end