diff options
-rw-r--r-- | doc/manual.txt | 39 | ||||
-rw-r--r-- | src/gnu/lgnu.c | 150 | ||||
-rw-r--r-- | test/emacs_sets.lua | 9 |
3 files changed, 83 insertions, 115 deletions
diff --git a/doc/manual.txt b/doc/manual.txt index 0237b44..cda6f26 100644 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -399,11 +399,12 @@ constants in the used library. They are formed as follows: but for ONIG_OPTION_xxx constants, alias strings are created additionally, e.g., the value of ONIG_OPTION_IGNORECASE constant becomes accessible via either of two keys: ``"ONIG_OPTION_IGNORECASE"`` and ``"IGNORECASE"``. -* **GNU**: the GNU library provides the flags ``not_bol``, which stops - a beginning-of-line anchor from matching at the start of a string, - ``not_eol``, which stops an end-of-line anchor from matching at the - end of a string, and ``reverse`` which causes the search to be - performed backwards. +* **GNU**: the GNU library provides the flags ``not_bol``, which stops a + beginning-of-line anchor from matching at the start of a string, ``not_eol``, + which stops an end-of-line anchor from matching at the end of a string, and + ``backward`` which causes the search to be performed backwards, as well as the + RE_xxx syntax specifiers (as defined in regex.h), omitting the RE\_ prefix. + For example, RE_SYNTAX_GREP becomes ``SYNTAX_GREP`` in Lua. ------------------------------------------------------------ @@ -622,37 +623,17 @@ GNU-only functions and methods new --- -:funcdef:`rex.new (patt, [cf], [syn], [tr])` +:funcdef:`rex.new (patt, [cf], [tr])` -The *syntax* parameter (*syn*) must be one of the predefined strings that are -formed from the RE_SYNTAX_xxx identifiers defined in regex.h, by means of -omitting the RE_SYNTAX\_ part. For example, RE_SYNTAX_GREP becomes ``"GREP"`` on -the Lua side. The default value, used when the parameter is not supplied or -``nil``, is either ``"POSIX_EXTENDED"`` (at start-up), or the value set by the -last setsyntax_ call. +If the compilation flags (*cf*) are not supplied or ``nil``, the default syntax +is ``SYNTAX_POSIX_EXTENDED``. Note that this is not the same as passing a value +of zero, which is the same as ``SYNTAX_EMACS``. The *translation* parameter (*tr*) is a map of eight-bit character codes (0 to 255 inclusive) to 8-bit characters (strings). If this parameter is given, the pattern is translated at compilation time, and each string to be matched is translated when it is being matched. -setsyntax ---------- - -:funcdef:`rex_gnu.setsyntax (syntax)` - -This function sets the default syntax for the GNU library (see the ``new`` -method above for the interpretation of the *syntax* parameter). The specified -syntax will be further used for compiling string regex patterns by all relevant -functions, unless the *syn* argument is passed to those functions explicitly. - -**Returns:** nothing - -**Examples:** - - 1. ``rex_gnu.setsyntax ("POSIX") -- use POSIX regex syntax as the default`` - 2. ``rex_gnu.setsyntax ("EMACS") -- use Emacs regex syntax as the default`` - Oniguruma-only functions and methods ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/gnu/lgnu.c b/src/gnu/lgnu.c index 1f9d76b..82e37fb 100644 --- a/src/gnu/lgnu.c +++ b/src/gnu/lgnu.c @@ -27,13 +27,13 @@ #define REX_TYPENAME REX_LIBNAME"_regex" -#define ALG_CFLAGS_DFLT 0 +#define ALG_CFLAGS_DFLT RE_SYNTAX_POSIX_EXTENDED #define ALG_EFLAGS_DFLT 0 -#define ALG_GETCFLAGS(L,pos) ALG_CFLAGS_DFLT +#define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT) -static void checkarg_compile (lua_State *L, int pos, TArgComp *argC); -#define ALG_GETCARGS(a,b,c) checkarg_compile(a,b,c) +static const unsigned char *gettranslate (lua_State *L, int pos); +#define ALG_GETCARGS(L,pos,argC) argC->translate = gettranslate (L, pos) #define ALG_NOMATCH(res) ((res) == -1 || (res) == -2) #define ALG_ISMATCH(res) ((res) >= 0) @@ -72,9 +72,9 @@ typedef struct { */ /* Execution flags, which we need to simulate as GNU does not use flags for this. */ -#define GNU_NOTBOL 1 -#define GNU_NOTEOL 2 -#define GNU_REVERSE 4 +#define GNU_NOTBOL 1 +#define GNU_NOTEOL 2 +#define GNU_BACKWARD 4 static int generate_error (lua_State *L, const TUserdata *ud, int errcode) { const char *errmsg; @@ -114,78 +114,20 @@ static const unsigned char *gettranslate (lua_State *L, int pos) { return translate; } -typedef struct { - const char * name; - int value; -} EncPair; - -/* ATTENTION: - This array must always be kept alphabetically sorted, as it's used in the - binary search, so take care when manually inserting new elements. - */ -static EncPair Syntaxes[] = { - { "AWK", RE_SYNTAX_AWK }, - { "ED", RE_SYNTAX_ED }, - { "EGREP", RE_SYNTAX_EGREP }, - { "EMACS", RE_SYNTAX_EMACS }, - { "GNU_AWK", RE_SYNTAX_GNU_AWK }, - { "GREP", RE_SYNTAX_GREP }, - { "POSIX_AWK", RE_SYNTAX_POSIX_AWK }, - { "POSIX_BASIC", RE_SYNTAX_POSIX_BASIC }, - { "POSIX_EGREP", RE_SYNTAX_POSIX_EGREP }, - { "POSIX_EXTENDED", RE_SYNTAX_POSIX_EXTENDED }, - { "POSIX_MINIMAL_BASIC", RE_SYNTAX_POSIX_MINIMAL_BASIC }, - { "POSIX_MINIMAL_EXTENDED", RE_SYNTAX_POSIX_MINIMAL_EXTENDED }, - { "SED", RE_SYNTAX_SED }, -}; - -static int fcmp (const void *p1, const void *p2) { - return strcmp (((EncPair*) p1)->name, ((EncPair*) p2)->name); -} - -static int getsyntax (lua_State *L, int pos) { - EncPair key, *found; - if ((key.name = luaL_optstring (L, pos, NULL)) == NULL) - return -1; - found = (EncPair*) bsearch (&key, Syntaxes, sizeof (Syntaxes) / sizeof (EncPair), - sizeof (EncPair), fcmp); - if (found == NULL) - luaL_argerror (L, pos, "invalid or unsupported syntax string"); - return found->value; -} - -static void checkarg_compile (lua_State *L, int pos, TArgComp *argC) { - argC->translate = gettranslate (L, pos); - argC->gnusyn = getsyntax (L, pos + 1); -} - static void seteflags (TGnu *ud, TArgExec *argE) { ud->r.not_bol = (argE->eflags & GNU_NOTBOL) != 0; ud->r.not_eol = (argE->eflags & GNU_NOTEOL) != 0; } -/* - rex.setsyntax (syntax) - @param syntax: one of the predefined strings listed in array 'Syntaxes' - @return: nothing -*/ -static int LGnu_setsyntax (lua_State *L) { - (void) luaL_checkstring (L, 1); - re_set_syntax (getsyntax (L, 1)); - return 0; -} - static int compile_regex (lua_State *L, const TArgComp *argC, TGnu **pud) { const char *res; TGnu *ud; - reg_syntax_t old_syntax = 0; int ret; ud = (TGnu *)lua_newuserdata (L, sizeof (TGnu)); memset (ud, 0, sizeof (TGnu)); /* initialize all members to 0 */ - if (argC->gnusyn >= 0) - old_syntax = re_set_syntax (argC->gnusyn); + re_set_syntax (argC->cflags); /* translate table is never written to, so this cast is safe */ ud->r.translate = (unsigned char *) argC->translate; @@ -195,9 +137,6 @@ static int compile_regex (lua_State *L, const TArgComp *argC, TGnu **pud) { ud->errmsg = res; ret = generate_error (L, ud, 0); } else { - if (argC->cflags & REG_NOSUB) - ud->r.no_sub = 1; - lua_pushvalue (L, LUA_ENVIRONINDEX); lua_setmetatable (L, -2); @@ -205,8 +144,6 @@ static int compile_regex (lua_State *L, const TArgComp *argC, TGnu **pud) { ret = 1; } - if (argC->gnusyn >= 0) - re_set_syntax (old_syntax); return ret; } @@ -216,7 +153,7 @@ static int gmatch_exec (TUserdata *ud, TArgExec *argE) { ud->r.not_bol = 1; argE->text += argE->startoffset; argE->textlen -= argE->startoffset; - if (argE->eflags & GNU_REVERSE) + if (argE->eflags & GNU_BACKWARD) return re_search (&ud->r, argE->text, argE->textlen, argE->textlen, -argE->textlen, &ud->match); else return re_search (&ud->r, argE->text, argE->textlen, 0, argE->textlen, &ud->match); @@ -230,7 +167,7 @@ static int findmatch_exec (TGnu *ud, TArgExec *argE) { argE->text += argE->startoffset; argE->textlen -= argE->startoffset; seteflags (ud, argE); - if (argE->eflags & GNU_REVERSE) + if (argE->eflags & GNU_BACKWARD) return re_search (&ud->r, argE->text, argE->textlen, argE->textlen, -argE->textlen, &ud->match); else return re_search (&ud->r, argE->text, argE->textlen, 0, argE->textlen, &ud->match); @@ -240,7 +177,7 @@ static int gsub_exec (TGnu *ud, TArgExec *argE, int st) { seteflags (ud, argE); if (st > 0) ud->r.not_bol = 1; - if (argE->eflags & GNU_REVERSE) + if (argE->eflags & GNU_BACKWARD) return re_search (&ud->r, argE->text + st, argE->textlen - st, argE->textlen - st, -(argE->textlen - st), &ud->match); else return re_search (&ud->r, argE->text + st, argE->textlen - st, 0, argE->textlen - st, &ud->match); @@ -250,7 +187,7 @@ static int split_exec (TGnu *ud, TArgExec *argE, int offset) { seteflags (ud, argE); if (offset > 0) ud->r.not_bol = 1; - if (argE->eflags & GNU_REVERSE) + if (argE->eflags & GNU_BACKWARD) return re_search (&ud->r, argE->text + offset, argE->textlen - offset, argE->textlen - offset, -(argE->textlen - offset), &ud->match); else return re_search (&ud->r, argE->text + offset, argE->textlen - offset, 0, argE->textlen - offset, &ud->match); @@ -278,15 +215,67 @@ static int Gnu_tostring (lua_State *L) { static flag_pair gnu_flags[] = { - { "not_bol", GNU_NOTBOL }, - { "not_eol", GNU_NOTEOL }, - { "reverse", GNU_REVERSE }, + { "not_bol", GNU_NOTBOL }, + { "not_eol", GNU_NOTEOL }, + { "backward", GNU_BACKWARD }, +/*---------------------------------------------------------------------------*/ + { NULL, 0 } +}; + +static flag_pair gnu_syntax_flags[] = { + /* Syntax flag sets. */ + { "SYNTAX_EMACS", RE_SYNTAX_EMACS }, + { "SYNTAX_AWK", RE_SYNTAX_AWK }, + { "SYNTAX_GNU_AWK", RE_SYNTAX_GNU_AWK }, + { "SYNTAX_POSIX_AWK", RE_SYNTAX_POSIX_AWK }, + { "SYNTAX_POSIX_AWK", RE_SYNTAX_POSIX_AWK }, + { "SYNTAX_EGREP", RE_SYNTAX_EGREP }, + { "SYNTAX_POSIX_EGREP", RE_SYNTAX_POSIX_EGREP }, + { "SYNTAX_ED", RE_SYNTAX_ED }, + { "SYNTAX_SED", RE_SYNTAX_SED }, + { "SYNTAX_POSIX_AWK", RE_SYNTAX_POSIX_AWK }, + { "SYNTAX_GREP", RE_SYNTAX_GREP }, + { "SYNTAX_POSIX_BASIC", RE_SYNTAX_POSIX_BASIC }, + { "SYNTAX_POSIX_MINIMAL_BASIC", RE_SYNTAX_POSIX_MINIMAL_BASIC }, + { "SYNTAX_POSIX_EXTENDED", RE_SYNTAX_POSIX_EXTENDED }, + { "SYNTAX_POSIX_MINIMAL_EXTENDED", RE_SYNTAX_POSIX_MINIMAL_EXTENDED }, + + /* Individual syntax flags. */ + { "BACKSLASH_ESCAPE_IN_LISTS", RE_BACKSLASH_ESCAPE_IN_LISTS }, + { "BK_PLUS_QM", RE_BK_PLUS_QM }, + { "CHAR_CLASSES", RE_CHAR_CLASSES }, + { "CONTEXT_INDEP_ANCHORS", RE_CONTEXT_INDEP_ANCHORS }, + { "CONTEXT_INDEP_OPS", RE_CONTEXT_INDEP_OPS }, + { "CONTEXT_INVALID_OPS", RE_CONTEXT_INVALID_OPS }, + { "DOT_NEWLINE", RE_DOT_NEWLINE }, + { "DOT_NOT_NULL", RE_DOT_NOT_NULL }, + { "HAT_LISTS_NOT_NEWLINE", RE_HAT_LISTS_NOT_NEWLINE }, + { "INTERVALS", RE_INTERVALS }, + { "LIMITED_OPS", RE_LIMITED_OPS }, + { "NEWLINE_ALT", RE_NEWLINE_ALT }, + { "NO_BK_BRACES", RE_NO_BK_BRACES }, + { "NO_BK_PARENS", RE_NO_BK_PARENS }, + { "NO_BK_REFS", RE_NO_BK_REFS }, + { "NO_BK_VBAR", RE_NO_BK_VBAR }, + { "NO_EMPTY_RANGES", RE_NO_EMPTY_RANGES }, + { "UNMATCHED_RIGHT_PAREN_ORD", RE_UNMATCHED_RIGHT_PAREN_ORD }, + { "NO_POSIX_BACKTRACKING", RE_NO_POSIX_BACKTRACKING }, + { "NO_GNU_OPS", RE_NO_GNU_OPS }, + { "DEBUG", RE_DEBUG }, + { "INVALID_INTERVAL_ORD", RE_INVALID_INTERVAL_ORD }, + { "ICASE", RE_ICASE }, + { "CARET_ANCHORS_HERE", RE_CARET_ANCHORS_HERE }, + { "CONTEXT_INVALID_DUP", RE_CONTEXT_INVALID_DUP }, + { "NO_SUB", RE_NO_SUB }, +#ifdef RE_PLAIN + { "PLAIN", RE_PLAIN }, +#endif /*---------------------------------------------------------------------------*/ { NULL, 0 } }; static int Gnu_get_flags (lua_State *L) { - const flag_pair* fps[] = { gnu_flags, NULL }; + const flag_pair* fps[] = { gnu_flags, gnu_syntax_flags, NULL }; return get_flags (L, fps); } @@ -309,15 +298,12 @@ static const luaL_reg rexlib[] = { { "new", ud_new }, { "flags", Gnu_get_flags }, { "plainfind", plainfind_func }, - { "setsyntax", LGnu_setsyntax }, { NULL, NULL } }; /* Open the library */ REX_API int REX_OPENLIB (lua_State *L) { - re_set_syntax (RE_SYNTAX_POSIX_EXTENDED); - /* create a new function environment to serve as a metatable for methods */ lua_newtable (L); lua_pushvalue (L, -1); diff --git a/test/emacs_sets.lua b/test/emacs_sets.lua index 19f0a3b..fb2b024 100644 --- a/test/emacs_sets.lua +++ b/test/emacs_sets.lua @@ -12,7 +12,7 @@ local function set_f_gmatch (lib, flg) -- gmatch (s, p, [cf], [ef]) local function test_gmatch (subj, patt) local out, guard = {}, 10 - for a, b in lib.gmatch (subj, patt, nil, nil, nil, "EMACS") do + for a, b in lib.gmatch (subj, patt, flg.SYNTAX_EMACS, nil) do table.insert (out, { norm(a), norm(b) }) guard = guard - 1 if guard == 0 then break end @@ -31,7 +31,7 @@ local function set_f_split (lib, flg) -- split (s, p, [cf], [ef]) local function test_split (subj, patt) local out, guard = {}, 10 - for a, b, c in lib.split (subj, patt, nil, nil, nil, "EMACS") do + for a, b, c in lib.split (subj, patt, flg.SYNTAX_EMACS, nil) do table.insert (out, { norm(a), norm(b), norm(c) }) guard = guard - 1 if guard == 0 then break end @@ -48,8 +48,9 @@ end return function (libname) local lib = require (libname) + local flags = lib.flags () return { - set_f_gmatch (lib), - set_f_split (lib), + set_f_gmatch (lib, flags), + set_f_split (lib, flags), } end |