diff options
author | shmuz <shmuz> | 2008-07-30 17:35:10 +0000 |
---|---|---|
committer | shmuz <shmuz> | 2008-07-30 17:35:10 +0000 |
commit | 357e3817481bba56eca4c5e7e844b515f6605c49 (patch) | |
tree | 79a86a08ec4c4e1c299a1fdbaf07dea9f2c8136d /src | |
parent | 09205eea30e1dfa3c8c309dd221ae1ab3c2590a8 (diff) | |
download | lrexlib-357e3817481bba56eca4c5e7e844b515f6605c49.tar.gz |
1) Moving files: each binding to its dedicated directory.
2) Oniguruma binding added.
Diffstat (limited to 'src')
-rwxr-xr-x | src/oniguruma/lonig.c | 368 | ||||
-rwxr-xr-x | src/oniguruma/lonig_f.c | 204 | ||||
-rwxr-xr-x | src/oniguruma/rex_onig.mak | 38 | ||||
-rwxr-xr-x | src/pcre/lpcre.c | 439 | ||||
-rwxr-xr-x | src/pcre/lpcre_f.c | 189 | ||||
-rwxr-xr-x | src/pcre/rex_pcre.mak | 38 | ||||
-rwxr-xr-x | src/posix/lposix.c | 295 | ||||
-rwxr-xr-x | src/posix/rex_posix.mak | 48 | ||||
-rwxr-xr-x | src/tre/ltre.c | 358 | ||||
-rwxr-xr-x | src/tre/rex_tre.mak | 42 |
10 files changed, 2019 insertions, 0 deletions
diff --git a/src/oniguruma/lonig.c b/src/oniguruma/lonig.c new file mode 100755 index 0000000..c9f16b0 --- /dev/null +++ b/src/oniguruma/lonig.c @@ -0,0 +1,368 @@ +/* lonig.c - Lua binding of Oniguruma library */ +/* See Copyright Notice in the file LICENSE */ + +#include <stdlib.h> +#include <string.h> +#include <locale.h> +#include <ctype.h> +#include <oniguruma/oniguruma.h> + +#include "lua.h" +#include "lauxlib.h" +#include "../common.h" + +extern int LOnig_get_flags (lua_State *L); + +/* These 2 settings may be redefined from the command-line or the makefile. + * They should be kept in sync between themselves and with the target name. + */ +#ifndef REX_LIBNAME +# define REX_LIBNAME "rex_onig" +#endif +#ifndef REX_OPENLIB +# define REX_OPENLIB luaopen_rex_onig +#endif + +#define REX_TYPENAME REX_LIBNAME"_regex" + +#define ALG_CFLAGS_DFLT ONIG_OPTION_NONE +#define ALG_EFLAGS_DFLT 0 + +static int getcflags (lua_State *L, int pos); +#define ALG_GETCFLAGS(L,pos) getcflags(L, pos) + +static void optlocale (TArgComp *argC, lua_State *L, int pos); +#define ALG_OPTLOCALE(a,b,c) optlocale(a,b,c) + +static void optsyntax (TArgComp *argC, lua_State *L, int pos); +#define ALG_OPTSYNTAX(a,b,c) optsyntax(a,b,c) + +#define ALG_NOMATCH ONIG_MISMATCH +#define ALG_ISMATCH(res) ((res) >= 0) +#define ALG_SUBBEG(ud,n) ud->region->beg[n] +#define ALG_SUBEND(ud,n) ud->region->end[n] +#define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) +#define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) +#define ALG_NSUB(ud) onig_number_of_captures(ud->reg) + +#define ALG_PUSHSUB(L,ud,text,n) \ + lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) + +#define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ + (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) + +#define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) +#define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) +#define ALG_PUSHOFFSETS(L,ud,offs,n) \ + (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) + +#define ALG_BASE(st) 0 +#define ALG_PULL +/* #define ALG_USERETRY */ + +typedef struct { + regex_t *reg; + OnigRegion *region; + OnigErrorInfo einfo; +} TOnig; + +#define TUserdata TOnig + +static void do_named_subpatterns (lua_State *L, TOnig *ud, const char *text); +# define DO_NAMED_SUBPATTERNS do_named_subpatterns + +#include "../algo.h" + +/* Functions + ****************************************************************************** + */ + +static int getcflags (lua_State *L, int pos) { + switch (lua_type (L, pos)) { + case LUA_TNONE: + case LUA_TNIL: + return ALG_CFLAGS_DFLT; + case LUA_TNUMBER: + return lua_tointeger (L, pos); + case LUA_TSTRING: { + const char *s = lua_tostring (L, pos); + int res = 0, ch; + while ((ch = *s++) != '\0') { + if (ch == 'i') res |= ONIG_OPTION_IGNORECASE; + else if (ch == 'm') res |= ONIG_OPTION_NEGATE_SINGLELINE; + else if (ch == 's') res |= ONIG_OPTION_MULTILINE; + else if (ch == 'x') res |= ONIG_OPTION_EXTEND; + /* else if (ch == 'U') res |= PCRE_UNGREEDY; */ + /* else if (ch == 'X') res |= PCRE_EXTRA; */ + } + return res; + } + default: + return luaL_typerror (L, pos, "number or string"); + } +} + +static int generate_error (lua_State *L, const TOnig *ud, int errcode) { + char buf [ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(buf, errcode, ud->einfo); + return luaL_error(L, buf); +} + +typedef struct { + const char * name; + void * value; +} EncPair; + +/* ATTENTION: + This array must always be kept alphabetically sorted, as it's used in the + binary search, so take care when manually inserting new elements. + */ +static EncPair Encodings[] = { + { "ASCII", ONIG_ENCODING_ASCII }, + { "BIG5", ONIG_ENCODING_BIG5 }, + { "CP1251", ONIG_ENCODING_CP1251 }, + { "EUC_CN", ONIG_ENCODING_EUC_CN }, + { "EUC_JP", ONIG_ENCODING_EUC_JP }, + { "EUC_KR", ONIG_ENCODING_EUC_KR }, + { "EUC_TW", ONIG_ENCODING_EUC_TW }, + { "GB18030", ONIG_ENCODING_GB18030 }, + { "ISO_8859_1", ONIG_ENCODING_ISO_8859_1 }, + { "ISO_8859_10", ONIG_ENCODING_ISO_8859_10 }, + { "ISO_8859_11", ONIG_ENCODING_ISO_8859_11 }, + { "ISO_8859_13", ONIG_ENCODING_ISO_8859_13 }, + { "ISO_8859_14", ONIG_ENCODING_ISO_8859_14 }, + { "ISO_8859_15", ONIG_ENCODING_ISO_8859_15 }, + { "ISO_8859_16", ONIG_ENCODING_ISO_8859_16 }, + { "ISO_8859_2", ONIG_ENCODING_ISO_8859_2 }, + { "ISO_8859_3", ONIG_ENCODING_ISO_8859_3 }, + { "ISO_8859_4", ONIG_ENCODING_ISO_8859_4 }, + { "ISO_8859_5", ONIG_ENCODING_ISO_8859_5 }, + { "ISO_8859_6", ONIG_ENCODING_ISO_8859_6 }, + { "ISO_8859_7", ONIG_ENCODING_ISO_8859_7 }, + { "ISO_8859_8", ONIG_ENCODING_ISO_8859_8 }, + { "ISO_8859_9", ONIG_ENCODING_ISO_8859_9 }, +/*{ "KOI8", ONIG_ENCODING_KOI8 }, */ + { "KOI8_R", ONIG_ENCODING_KOI8_R }, + { "SJIS", ONIG_ENCODING_SJIS }, + { "UNDEF", ONIG_ENCODING_UNDEF }, + { "UTF16_BE", ONIG_ENCODING_UTF16_BE }, + { "UTF16_LE", ONIG_ENCODING_UTF16_LE }, + { "UTF32_BE", ONIG_ENCODING_UTF32_BE }, + { "UTF32_LE", ONIG_ENCODING_UTF32_LE }, + { "UTF8", ONIG_ENCODING_UTF8 }, +}; + +/* ATTENTION: + This array must always be kept alphabetically sorted, as it's used in the + binary search, so take care when manually inserting new elements. + */ +static EncPair Syntaxes[] = { + { "ASIS", ONIG_SYNTAX_ASIS }, +/*{ "DEFAULT", ONIG_SYNTAX_DEFAULT }, */ + { "EMACS", ONIG_SYNTAX_EMACS }, + { "GNU_REGEX", ONIG_SYNTAX_GNU_REGEX }, + { "GREP", ONIG_SYNTAX_GREP }, + { "JAVA", ONIG_SYNTAX_JAVA }, + { "PERL", ONIG_SYNTAX_PERL }, + { "PERL_NG", ONIG_SYNTAX_PERL_NG }, + { "POSIX_BASIC", ONIG_SYNTAX_POSIX_BASIC }, + { "POSIX_EXTENDED", ONIG_SYNTAX_POSIX_EXTENDED }, + { "RUBY", ONIG_SYNTAX_RUBY }, +}; + +static int _cdecl fcmp(const void *p1, const void *p2) { + return stricmp(((EncPair*)p1)->name, ((EncPair*)p2)->name); +} + +static void optlocale (TArgComp *argC, lua_State *L, int pos) { + EncPair key; + if ((key.name = luaL_optstring(L, pos, NULL)) == NULL) + argC->locale = (const char*)ONIG_ENCODING_ASCII; + else { + EncPair *pair = (EncPair*) bsearch(&key, Encodings, + sizeof(Encodings)/sizeof(EncPair), sizeof(EncPair), fcmp); + if (pair != NULL) + argC->locale = (const char*)pair->value; + else + luaL_argerror(L, pos, "invalid or unsupported encoding string"); + } +} + +static void *getsyntax (lua_State *L, int pos) { + EncPair key, *found; + if ((key.name = luaL_optstring(L, pos, NULL)) == NULL) + return ONIG_SYNTAX_DEFAULT; + found = (EncPair*) bsearch(&key, Syntaxes, sizeof(Syntaxes)/sizeof(EncPair), + sizeof(EncPair), fcmp); + if (found == NULL) + luaL_argerror(L, pos, "invalid or unsupported syntax string"); + return found->value; +} + +static void optsyntax (TArgComp *argC, lua_State *L, int pos) { + argC->syntax = getsyntax(L, pos); +} + +/* + rex.setdefaultsyntax (syntax) + @param syntax: one of the predefined strings listed in array 'Syntaxes' + @return: nothing +*/ +static int LOnig_setdefaultsyntax (lua_State *L) { + (void)luaL_checkstring(L, 1); + onig_set_default_syntax(getsyntax(L, 1)); + return 0; +} + +static int compile_regex (lua_State *L, const TArgComp *argC, TOnig **pud) { + TOnig *ud; + int r; + + ud = (TOnig*)lua_newuserdata (L, sizeof (TOnig)); + memset (ud, 0, sizeof (TOnig)); /* initialize all members to 0 */ + lua_pushvalue (L, LUA_ENVIRONINDEX); + lua_setmetatable (L, -2); + + r = onig_new(&ud->reg, argC->pattern, argC->pattern + argC->patlen, + argC->cflags, (OnigEncoding)argC->locale, (OnigSyntaxType*)argC->syntax, + &ud->einfo); + if (r != ONIG_NORMAL) + return generate_error(L, ud, r); + + if ((ud->region = onig_region_new()) == NULL) + return luaL_error(L, "`onig_region_new' failed"); + + if (pud) *pud = ud; + return 1; +} + +typedef struct { + lua_State *L; + TOnig *ud; + const char *text; +} TNameArg; + +static int name_callback (const UChar *name, const UChar *name_end, + int ngroups, int *groupnumlist, regex_t *reg, void *arg) +{ + (void) ngroups; + (void) groupnumlist; + TNameArg *A = (TNameArg*)arg; + int num = onig_name_to_backref_number(reg, name, name_end, A->ud->region); + lua_pushlstring (A->L, (const char*)name, name_end - name); + ALG_PUSHSUB_OR_FALSE (A->L, A->ud, A->text, num); + lua_rawset (A->L, -3); + return 0; +} + +/* the target table must be on lua stack top */ +static void do_named_subpatterns (lua_State *L, TOnig *ud, const char *text) { + if (onig_number_of_names (ud->reg) > 0) { + TNameArg A = { L, ud, text }; + onig_foreach_name(ud->reg, name_callback, &A); + } +} + +static int findmatch_exec (TUserdata *ud, TArgExec *argE) { + const char *end = argE->text + argE->textlen; + onig_region_clear(ud->region); + return onig_search (ud->reg, argE->text, end, argE->text + argE->startoffset, + end, ud->region, argE->eflags); +} + +static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { + lua_pushlstring (L, argE->text, argE->textlen); +} + +static int gmatch_exec (TOnig *ud, TArgExec *argE) { + return findmatch_exec(ud, argE); +} + +static int gsub_exec (TOnig *ud, TArgExec *argE, int st) { + const char *end = argE->text + argE->textlen; + onig_region_clear(ud->region); + return onig_search (ud->reg, argE->text, end, argE->text + st, + end, ud->region, argE->eflags); +} + +static int split_exec (TOnig *ud, TArgExec *argE, int st) { + return gsub_exec(ud, argE, st); +} + +static int LOnig_gc (lua_State *L) { + TOnig *ud = check_ud (L); + if (ud->reg) { /* precaution against "manual" __gc calling */ + onig_free (ud->reg); + ud->reg = NULL; + } + if (ud->region) { + onig_region_free (ud->region, 1); + ud->region = NULL; + } + return 0; +} + +static int LOnig_tostring (lua_State *L) { + TOnig *ud = check_ud (L); + if (ud->reg) + lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); + else + lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); + return 1; +} + +static int LOnig_version (lua_State *L) { + lua_pushstring (L, onig_version ()); + return 1; +} + +static const luaL_reg regex_meta[] = { + { "exec", ud_exec }, + { "tfind", ud_tfind }, /* old name: match */ + { "find", ud_find }, + { "match", ud_match }, + { "__gc", LOnig_gc }, + { "__tostring", LOnig_tostring }, + { NULL, NULL } +}; + +static const luaL_reg rexlib[] = { + { "match", match }, + { "find", find }, + { "gmatch", gmatch }, + { "gsub", gsub }, + { "split", split }, + { "new", ud_new }, + { "plainfind", plainfind_func }, + { "flags", LOnig_get_flags }, + { "version", LOnig_version }, + { "setdefaultsyntax", LOnig_setdefaultsyntax }, + { NULL, NULL } +}; + +/* Open the library */ +REX_API int REX_OPENLIB (lua_State *L) { + if (ONIGURUMA_VERSION_MAJOR > atoi (onig_version ())) { + return luaL_error (L, "%s requires at least version %d of Oniguruma library", + REX_LIBNAME, (int)ONIGURUMA_VERSION_MAJOR); + } + + onig_init(); + onig_set_default_syntax(ONIG_SYNTAX_RUBY); + + /* create a new function environment to serve as a metatable for methods */ + lua_newtable (L); + lua_pushvalue (L, -1); + lua_replace (L, LUA_ENVIRONINDEX); + lua_pushvalue(L, -1); /* mt.__index = mt */ + lua_setfield(L, -2, "__index"); + luaL_register (L, NULL, regex_meta); + + /* register functions */ + luaL_register (L, REX_LIBNAME, rexlib); + lua_pushliteral (L, REX_VERSION" (for Oniguruma)"); + lua_setfield (L, -2, "_VERSION"); + + return 1; +} + diff --git a/src/oniguruma/lonig_f.c b/src/oniguruma/lonig_f.c new file mode 100755 index 0000000..1b3bbce --- /dev/null +++ b/src/oniguruma/lonig_f.c @@ -0,0 +1,204 @@ +/* lonig_f.c - Lua binding of Oniguruma library */ +/* See Copyright Notice in the file LICENSE */ + +#include <oniguruma/oniguruma.h> +#include <lua.h> +#include <lauxlib.h> +#include "../common.h" + +static flag_pair onig_flags[] = { + { "ONIG_INFINITE_DISTANCE", ONIG_INFINITE_DISTANCE }, + { "ONIG_NREGION", ONIG_NREGION }, + { "ONIG_MAX_BACKREF_NUM", ONIG_MAX_BACKREF_NUM }, + { "ONIG_MAX_REPEAT_NUM", ONIG_MAX_REPEAT_NUM }, + { "ONIG_MAX_MULTI_BYTE_RANGES_NUM", ONIG_MAX_MULTI_BYTE_RANGES_NUM }, + { "ONIG_MAX_ERROR_MESSAGE_LEN", ONIG_MAX_ERROR_MESSAGE_LEN }, + { "ONIG_OPTION_DEFAULT", ONIG_OPTION_DEFAULT }, + { "DEFAULT", ONIG_OPTION_DEFAULT }, + { "ONIG_OPTION_NONE", ONIG_OPTION_NONE }, + { "NONE", ONIG_OPTION_NONE }, + { "ONIG_OPTION_IGNORECASE", ONIG_OPTION_IGNORECASE }, + { "IGNORECASE", ONIG_OPTION_IGNORECASE }, + { "ONIG_OPTION_EXTEND", ONIG_OPTION_EXTEND }, + { "EXTEND", ONIG_OPTION_EXTEND }, + { "ONIG_OPTION_MULTILINE", ONIG_OPTION_MULTILINE }, + { "MULTILINE", ONIG_OPTION_MULTILINE }, + { "ONIG_OPTION_SINGLELINE", ONIG_OPTION_SINGLELINE }, + { "SINGLELINE", ONIG_OPTION_SINGLELINE }, + { "ONIG_OPTION_FIND_LONGEST", ONIG_OPTION_FIND_LONGEST }, + { "FIND_LONGEST", ONIG_OPTION_FIND_LONGEST }, + { "ONIG_OPTION_FIND_NOT_EMPTY", ONIG_OPTION_FIND_NOT_EMPTY }, + { "FIND_NOT_EMPTY", ONIG_OPTION_FIND_NOT_EMPTY }, + { "ONIG_OPTION_NEGATE_SINGLELINE", ONIG_OPTION_NEGATE_SINGLELINE }, + { "NEGATE_SINGLELINE", ONIG_OPTION_NEGATE_SINGLELINE }, + { "ONIG_OPTION_DONT_CAPTURE_GROUP", ONIG_OPTION_DONT_CAPTURE_GROUP }, + { "DONT_CAPTURE_GROUP", ONIG_OPTION_DONT_CAPTURE_GROUP }, + { "ONIG_OPTION_CAPTURE_GROUP", ONIG_OPTION_CAPTURE_GROUP }, + { "CAPTURE_GROUP", ONIG_OPTION_CAPTURE_GROUP }, + { "ONIG_OPTION_NOTBOL", ONIG_OPTION_NOTBOL }, + { "NOTBOL", ONIG_OPTION_NOTBOL }, + { "ONIG_OPTION_NOTEOL", ONIG_OPTION_NOTEOL }, + { "NOTEOL", ONIG_OPTION_NOTEOL }, + { "ONIG_OPTION_POSIX_REGION", ONIG_OPTION_POSIX_REGION }, + { "POSIX_REGION", ONIG_OPTION_POSIX_REGION }, + { "ONIG_OPTION_MAXBIT", ONIG_OPTION_MAXBIT }, + { "MAXBIT", ONIG_OPTION_MAXBIT }, + { "ONIG_SYN_OP_VARIABLE_META_CHARACTERS", ONIG_SYN_OP_VARIABLE_META_CHARACTERS }, + { "ONIG_SYN_OP_DOT_ANYCHAR", ONIG_SYN_OP_DOT_ANYCHAR }, + { "ONIG_SYN_OP_ASTERISK_ZERO_INF", ONIG_SYN_OP_ASTERISK_ZERO_INF }, + { "ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF", ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF }, + { "ONIG_SYN_OP_PLUS_ONE_INF", ONIG_SYN_OP_PLUS_ONE_INF }, + { "ONIG_SYN_OP_ESC_PLUS_ONE_INF", ONIG_SYN_OP_ESC_PLUS_ONE_INF }, + { "ONIG_SYN_OP_QMARK_ZERO_ONE", ONIG_SYN_OP_QMARK_ZERO_ONE }, + { "ONIG_SYN_OP_ESC_QMARK_ZERO_ONE", ONIG_SYN_OP_ESC_QMARK_ZERO_ONE }, + { "ONIG_SYN_OP_BRACE_INTERVAL", ONIG_SYN_OP_BRACE_INTERVAL }, + { "ONIG_SYN_OP_ESC_BRACE_INTERVAL", ONIG_SYN_OP_ESC_BRACE_INTERVAL }, + { "ONIG_SYN_OP_VBAR_ALT", ONIG_SYN_OP_VBAR_ALT }, + { "ONIG_SYN_OP_ESC_VBAR_ALT", ONIG_SYN_OP_ESC_VBAR_ALT }, + { "ONIG_SYN_OP_LPAREN_SUBEXP", ONIG_SYN_OP_LPAREN_SUBEXP }, + { "ONIG_SYN_OP_ESC_LPAREN_SUBEXP", ONIG_SYN_OP_ESC_LPAREN_SUBEXP }, + { "ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR", ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR }, + { "ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR", ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR }, + { "ONIG_SYN_OP_DECIMAL_BACKREF", ONIG_SYN_OP_DECIMAL_BACKREF }, + { "ONIG_SYN_OP_BRACKET_CC", ONIG_SYN_OP_BRACKET_CC }, + { "ONIG_SYN_OP_ESC_W_WORD", ONIG_SYN_OP_ESC_W_WORD }, + { "ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END", ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END }, + { "ONIG_SYN_OP_ESC_B_WORD_BOUND", ONIG_SYN_OP_ESC_B_WORD_BOUND }, + { "ONIG_SYN_OP_ESC_S_WHITE_SPACE", ONIG_SYN_OP_ESC_S_WHITE_SPACE }, + { "ONIG_SYN_OP_ESC_D_DIGIT", ONIG_SYN_OP_ESC_D_DIGIT }, + { "ONIG_SYN_OP_LINE_ANCHOR", ONIG_SYN_OP_LINE_ANCHOR }, + { "ONIG_SYN_OP_POSIX_BRACKET", ONIG_SYN_OP_POSIX_BRACKET }, + { "ONIG_SYN_OP_QMARK_NON_GREEDY", ONIG_SYN_OP_QMARK_NON_GREEDY }, + { "ONIG_SYN_OP_ESC_CONTROL_CHARS", ONIG_SYN_OP_ESC_CONTROL_CHARS }, + { "ONIG_SYN_OP_ESC_C_CONTROL", ONIG_SYN_OP_ESC_C_CONTROL }, + { "ONIG_SYN_OP_ESC_OCTAL3", ONIG_SYN_OP_ESC_OCTAL3 }, + { "ONIG_SYN_OP_ESC_X_HEX2", ONIG_SYN_OP_ESC_X_HEX2 }, + { "ONIG_SYN_OP_ESC_X_BRACE_HEX8", ONIG_SYN_OP_ESC_X_BRACE_HEX8 }, + { "ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE", ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE }, + { "ONIG_SYN_OP2_QMARK_GROUP_EFFECT", ONIG_SYN_OP2_QMARK_GROUP_EFFECT }, + { "ONIG_SYN_OP2_OPTION_PERL", ONIG_SYN_OP2_OPTION_PERL }, + { "ONIG_SYN_OP2_OPTION_RUBY", ONIG_SYN_OP2_OPTION_RUBY }, + { "ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT", ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT }, + { "ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL", ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL }, + { "ONIG_SYN_OP2_CCLASS_SET_OP", ONIG_SYN_OP2_CCLASS_SET_OP }, + { "ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP", ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP }, + { "ONIG_SYN_OP2_ESC_K_NAMED_BACKREF", ONIG_SYN_OP2_ESC_K_NAMED_BACKREF }, + { "ONIG_SYN_OP2_ESC_G_SUBEXP_CALL", ONIG_SYN_OP2_ESC_G_SUBEXP_CALL }, + { "ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY", ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY }, + { "ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL", ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL }, + { "ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META", ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META }, + { "ONIG_SYN_OP2_ESC_V_VTAB", ONIG_SYN_OP2_ESC_V_VTAB }, + { "ONIG_SYN_OP2_ESC_U_HEX4", ONIG_SYN_OP2_ESC_U_HEX4 }, + { "ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR", ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR }, + { "ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY", ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY }, + { "ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT", ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT }, + { "ONIG_SYN_OP2_ESC_H_XDIGIT", ONIG_SYN_OP2_ESC_H_XDIGIT }, + { "ONIG_SYN_OP2_INEFFECTIVE_ESCAPE", ONIG_SYN_OP2_INEFFECTIVE_ESCAPE }, + { "ONIG_SYN_CONTEXT_INDEP_ANCHORS", ONIG_SYN_CONTEXT_INDEP_ANCHORS }, + { "ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS", ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS }, + { "ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS", ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS }, + { "ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP", ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP }, + { "ONIG_SYN_ALLOW_INVALID_INTERVAL", ONIG_SYN_ALLOW_INVALID_INTERVAL }, + { "ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV", ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV }, + { "ONIG_SYN_STRICT_CHECK_BACKREF", ONIG_SYN_STRICT_CHECK_BACKREF }, + { "ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND", ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND }, + { "ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP", ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP }, + { "ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME", ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME }, + { "ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY", ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY }, + { "ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC", ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC }, + { "ONIG_SYN_BACKSLASH_ESCAPE_IN_CC", ONIG_SYN_BACKSLASH_ESCAPE_IN_CC }, + { "ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC", ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC }, + { "ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC", ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC }, + { "ONIG_SYN_WARN_CC_OP_NOT_ESCAPED", ONIG_SYN_WARN_CC_OP_NOT_ESCAPED }, + { "ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT", ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT }, + { "ONIG_META_CHAR_ESCAPE", ONIG_META_CHAR_ESCAPE }, + { "ONIG_META_CHAR_ANYCHAR", ONIG_META_CHAR_ANYCHAR }, + { "ONIG_META_CHAR_ANYTIME", ONIG_META_CHAR_ANYTIME }, + { "ONIG_META_CHAR_ZERO_OR_ONE_TIME", ONIG_META_CHAR_ZERO_OR_ONE_TIME }, + { "ONIG_META_CHAR_ONE_OR_MORE_TIME", ONIG_META_CHAR_ONE_OR_MORE_TIME }, + { "ONIG_META_CHAR_ANYCHAR_ANYTIME", ONIG_META_CHAR_ANYCHAR_ANYTIME }, + { "ONIG_INEFFECTIVE_META_CHAR", ONIG_INEFFECTIVE_META_CHAR }, + { "ONIG_NORMAL", ONIG_NORMAL }, + { "ONIG_MISMATCH", ONIG_MISMATCH }, + { "ONIG_NO_SUPPORT_CONFIG", ONIG_NO_SUPPORT_CONFIG }, + { "ONIG_MAX_CAPTURE_HISTORY_GROUP", ONIG_MAX_CAPTURE_HISTORY_GROUP }, + { "ONIG_TRAVERSE_CALLBACK_AT_FIRST", ONIG_TRAVERSE_CALLBACK_AT_FIRST }, + { "ONIG_TRAVERSE_CALLBACK_AT_LAST", ONIG_TRAVERSE_CALLBACK_AT_LAST }, + { "ONIG_TRAVERSE_CALLBACK_AT_BOTH", ONIG_TRAVERSE_CALLBACK_AT_BOTH }, + { "ONIG_REGION_NOTPOS", ONIG_REGION_NOTPOS }, + { "ONIG_CHAR_TABLE_SIZE", ONIG_CHAR_TABLE_SIZE }, + { "ONIG_STATE_NORMAL", ONIG_STATE_NORMAL }, + { "ONIG_STATE_SEARCHING", ONIG_STATE_SEARCHING }, + { "ONIG_STATE_COMPILING", ONIG_STATE_COMPILING }, + { "ONIG_STATE_MODIFY", ONIG_STATE_MODIFY }, + { NULL, 0 } +}; + +static flag_pair onig_error_flags[] = { + { "ONIGERR_MEMORY", ONIGERR_MEMORY }, + { "ONIGERR_TYPE_BUG", ONIGERR_TYPE_BUG }, + { "ONIGERR_PARSER_BUG", ONIGERR_PARSER_BUG }, + { "ONIGERR_STACK_BUG", ONIGERR_STACK_BUG }, + { "ONIGERR_UNDEFINED_BYTECODE", ONIGERR_UNDEFINED_BYTECODE }, + { "ONIGERR_UNEXPECTED_BYTECODE", ONIGERR_UNEXPECTED_BYTECODE }, + { "ONIGERR_MATCH_STACK_LIMIT_OVER", ONIGERR_MATCH_STACK_LIMIT_OVER }, + { "ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED", ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED }, + { "ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR", ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR }, + { "ONIGERR_INVALID_ARGUMENT", ONIGERR_INVALID_ARGUMENT }, + { "ONIGERR_END_PATTERN_AT_LEFT_BRACE", ONIGERR_END_PATTERN_AT_LEFT_BRACE }, + { "ONIGERR_END_PATTERN_AT_LEFT_BRACKET", ONIGERR_END_PATTERN_AT_LEFT_BRACKET }, + { "ONIGERR_EMPTY_CHAR_CLASS", ONIGERR_EMPTY_CHAR_CLASS }, + { "ONIGERR_PREMATURE_END_OF_CHAR_CLASS", ONIGERR_PREMATURE_END_OF_CHAR_CLASS }, + { "ONIGERR_END_PATTERN_AT_ESCAPE", ONIGERR_END_PATTERN_AT_ESCAPE }, + { "ONIGERR_END_PATTERN_AT_META", ONIGERR_END_PATTERN_AT_META }, + { "ONIGERR_END_PATTERN_AT_CONTROL", ONIGERR_END_PATTERN_AT_CONTROL }, + { "ONIGERR_META_CODE_SYNTAX", ONIGERR_META_CODE_SYNTAX }, + { "ONIGERR_CONTROL_CODE_SYNTAX", ONIGERR_CONTROL_CODE_SYNTAX }, + { "ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE", ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE }, + { "ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE", ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE }, + { "ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS", ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS }, + { "ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED }, + { "ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID", ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID }, + { "ONIGERR_NESTED_REPEAT_OPERATOR", ONIGERR_NESTED_REPEAT_OPERATOR }, + { "ONIGERR_UNMATCHED_CLOSE_PARENTHESIS", ONIGERR_UNMATCHED_CLOSE_PARENTHESIS }, + { "ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS", ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS }, + { "ONIGERR_END_PATTERN_IN_GROUP", ONIGERR_END_PATTERN_IN_GROUP }, + { "ONIGERR_UNDEFINED_GROUP_OPTION", ONIGERR_UNDEFINED_GROUP_OPTION }, + { "ONIGERR_INVALID_POSIX_BRACKET_TYPE", ONIGERR_INVALID_POSIX_BRACKET_TYPE }, + { "ONIGERR_INVALID_LOOK_BEHIND_PATTERN", ONIGERR_INVALID_LOOK_BEHIND_PATTERN }, + { "ONIGERR_INVALID_REPEAT_RANGE_PATTERN", ONIGERR_INVALID_REPEAT_RANGE_PATTERN }, + { "ONIGERR_TOO_BIG_NUMBER", ONIGERR_TOO_BIG_NUMBER }, + { "ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE }, + { "ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE", ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE }, + { "ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS", ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS }, + { "ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE", ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE }, + { "ONIGERR_TOO_MANY_MULTI_BYTE_RANGES", ONIGERR_TOO_MANY_MULTI_BYTE_RANGES }, + { "ONIGERR_TOO_SHORT_MULTI_BYTE_STRING", ONIGERR_TOO_SHORT_MULTI_BYTE_STRING }, + { "ONIGERR_TOO_BIG_BACKREF_NUMBER", ONIGERR_TOO_BIG_BACKREF_NUMBER }, + { "ONIGERR_INVALID_BACKREF", ONIGERR_INVALID_BACKREF }, + { "ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED", ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED }, + { "ONIGERR_TOO_LONG_WIDE_CHAR_VALUE", ONIGERR_TOO_LONG_WIDE_CHAR_VALUE }, + { "ONIGERR_EMPTY_GROUP_NAME", ONIGERR_EMPTY_GROUP_NAME }, + { "ONIGERR_INVALID_GROUP_NAME", ONIGERR_INVALID_GROUP_NAME }, + { "ONIGERR_INVALID_CHAR_IN_GROUP_NAME", ONIGERR_INVALID_CHAR_IN_GROUP_NAME }, + { "ONIGERR_UNDEFINED_NAME_REFERENCE", ONIGERR_UNDEFINED_NAME_REFERENCE }, + { "ONIGERR_UNDEFINED_GROUP_REFERENCE", ONIGERR_UNDEFINED_GROUP_REFERENCE }, + { "ONIGERR_MULTIPLEX_DEFINED_NAME", ONIGERR_MULTIPLEX_DEFINED_NAME }, + { "ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL", ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL }, + { "ONIGERR_NEVER_ENDING_RECURSION", ONIGERR_NEVER_ENDING_RECURSION }, + { "ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY", ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY }, + { "ONIGERR_INVALID_CHAR_PROPERTY_NAME", ONIGERR_INVALID_CHAR_PROPERTY_NAME }, + { "ONIGERR_INVALID_CODE_POINT_VALUE", ONIGERR_INVALID_CODE_POINT_VALUE }, + { "ONIGERR_INVALID_WIDE_CHAR_VALUE", ONIGERR_INVALID_WIDE_CHAR_VALUE }, + { "ONIGERR_TOO_BIG_WIDE_CHAR_VALUE", ONIGERR_TOO_BIG_WIDE_CHAR_VALUE }, + { "ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION", ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION }, + { "ONIGERR_INVALID_COMBINATION_OF_OPTIONS", ONIGERR_INVALID_COMBINATION_OF_OPTIONS }, + { "ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT", ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT }, + { NULL, 0 } +}; + +int LOnig_get_flags (lua_State *L) { + const flag_pair* fps[] = { onig_flags, onig_error_flags, NULL }; + return get_flags (L, fps); +} + diff --git a/src/oniguruma/rex_onig.mak b/src/oniguruma/rex_onig.mak new file mode 100755 index 0000000..60b7375 --- /dev/null +++ b/src/oniguruma/rex_onig.mak @@ -0,0 +1,38 @@ +# makefile for rex_onig library + +include ../defaults.mak + +# === USER SETTINGS === +# =========================================================================== + +# These are default values. +INC = +LIB = -lonig + +# If the default settings don't work for your system, +# try to uncomment and edit the settings below. +#INC = +#LIB = -lonig + +# Target name +TRG = rex_onig + +# =========================================================================== +# === END OF USER SETTINGS === + +OBJ = lonig.o lonig_f.o common.o + +include ../common.mak + +# static Oniguruma regexp library binding +ar_onig: $(TRG_AR) + +# dynamic Oniguruma regexp library binding +so_onig: $(TRG_SO) + +# Dependencies +lonig.o: lonig.c common.h algo.h +lonig_f.o: lonig_f.c common.h +common.o: common.c common.h + +# (End of Makefile) diff --git a/src/pcre/lpcre.c b/src/pcre/lpcre.c new file mode 100755 index 0000000..1f3ff50 --- /dev/null +++ b/src/pcre/lpcre.c @@ -0,0 +1,439 @@ +/* lpcre.c - Lua binding of PCRE library */ +/* See Copyright Notice in the file LICENSE */ + +#include <stdlib.h> +#include <string.h> +#include <locale.h> +#include <ctype.h> +#include <pcre.h> + +#include "lua.h" +#include "lauxlib.h" +#include "../common.h" + +extern int Lpcre_get_flags (lua_State *L); +extern int Lpcre_config (lua_State *L); +extern flag_pair pcre_error_flags[]; + +/* These 2 settings may be redefined from the command-line or the makefile. + * They should be kept in sync between themselves and with the target name. + */ +#ifndef REX_LIBNAME +# define REX_LIBNAME "rex_pcre" +#endif +#ifndef REX_OPENLIB +# define REX_OPENLIB luaopen_rex_pcre +#endif + +#define REX_TYPENAME REX_LIBNAME"_regex" + +#define ALG_CFLAGS_DFLT 0 +#define ALG_EFLAGS_DFLT 0 + +static int getcflags (lua_State *L, int pos); +#define ALG_GETCFLAGS(L,pos) getcflags(L, pos) + +static void optlocale (TArgComp *argC, lua_State *L, int pos); +#define ALG_OPTLOCALE(a,b,c) optlocale(a,b,c) + +#define ALG_NOMATCH PCRE_ERROR_NOMATCH +#define ALG_ISMATCH(res) ((res) >= 0) +#define ALG_SUBBEG(ud,n) ud->match[n+n] +#define ALG_SUBEND(ud,n) ud->match[n+n+1] +#define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) +#define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) +#define ALG_NSUB(ud) ((int)ud->ncapt) + +#define ALG_PUSHSUB(L,ud,text,n) \ + lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) + +#define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ + (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) + +#define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) +#define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) +#define ALG_PUSHOFFSETS(L,ud,offs,n) \ + (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) + +#define ALG_BASE(st) 0 +#define ALG_PULL +#define ALG_USERETRY + +typedef struct { + pcre * pr; + pcre_extra * extra; + int * match; + int ncapt; + const unsigned char * tables; + int freed; +} TPcre; + +#define TUserdata TPcre + +#if PCRE_MAJOR >= 4 +static void do_named_subpatterns (lua_State *L, TPcre *ud, const char *text); +# define DO_NAMED_SUBPATTERNS do_named_subpatterns +#endif + +#include "../algo.h" + +/* Locations of the 2 permanent tables in the function environment */ +#define INDEX_CHARTABLES_META 1 /* chartables type's metatable */ +#define INDEX_CHARTABLES_LINK 2 /* link chartables to compiled regex */ + +const char chartables_typename[] = "chartables"; + +/* Functions + ****************************************************************************** + */ + +static void push_chartables_meta (lua_State *L) { + lua_pushinteger (L, INDEX_CHARTABLES_META); + lua_rawget (L, LUA_ENVIRONINDEX); +} + +static int getcflags (lua_State *L, int pos) { + switch (lua_type (L, pos)) { + case LUA_TNONE: + case LUA_TNIL: + return ALG_CFLAGS_DFLT; + case LUA_TNUMBER: + return lua_tointeger (L, pos); + case LUA_TSTRING: { + const char *s = lua_tostring (L, pos); + int res = 0, ch; + while ((ch = *s++) != '\0') { + if (ch == 'i') res |= PCRE_CASELESS; + else if (ch == 'm') res |= PCRE_MULTILINE; + else if (ch == 's') res |= PCRE_DOTALL; + else if (ch == 'x') res |= PCRE_EXTENDED; + else if (ch == 'U') res |= PCRE_UNGREEDY; + else if (ch == 'X') res |= PCRE_EXTRA; + } + return res; + } + default: + return luaL_typerror (L, pos, "number or string"); + } +} + +static int generate_error (lua_State *L, const TPcre *ud, int errcode) { + const char *key = get_flag_key (pcre_error_flags, errcode); + (void) ud; + if (key) + return luaL_error (L, "error PCRE_%s", key); + else + return luaL_error (L, "PCRE error code %d", errcode); +} + +#if PCRE_MAJOR >= 6 +/* method r:dfa_exec (s, [st], [ef], [ovecsize], [wscount]) */ +static void checkarg_dfa_exec (lua_State *L, TArgExec *argE, TPcre **ud) { + *ud = check_ud (L); + argE->text = luaL_checklstring (L, 2, &argE->textlen); + argE->startoffset = get_startoffset (L, 3, argE->textlen); + argE->eflags = luaL_optint (L, 4, ALG_EFLAGS_DFLT); + argE->ovecsize = luaL_optint (L, 5, 100); + argE->wscount = luaL_optint (L, 6, 50); +} +#endif + +static int Lpcre_maketables (lua_State *L) { + *(const void**)lua_newuserdata (L, sizeof(void*)) = pcre_maketables(); + push_chartables_meta (L); + lua_setmetatable (L, -2); + return 1; +} + +static void **check_chartables (lua_State *L, int pos) { + void **q; + /* Compare the metatable against the C function environment. */ + if (lua_getmetatable(L, pos)) { + push_chartables_meta (L); + if (lua_rawequal(L, -1, -2) && + (q = (void **)lua_touserdata(L, pos)) != NULL) { + lua_pop(L, 2); + return q; + } + } + luaL_argerror(L, pos, lua_pushfstring (L, "not a %s", chartables_typename)); + return NULL; +} + +static int chartables_gc (lua_State *L) { + void **ud = check_chartables (L, 1); + if (*ud) { + pcre_free (*ud); + *ud = NULL; + } + return 0; +} + +static void optlocale (TArgComp *argC, lua_State *L, int pos) { + argC->locale = NULL; + argC->tables = NULL; + if (!lua_isnoneornil (L, pos)) { + if (lua_isstring (L, pos)) + argC->locale = lua_tostring (L, pos); + else { + argC->tablespos = pos; + argC->tables = *check_chartables (L, pos); + } + } +} + +static int compile_regex (lua_State *L, const TArgComp *argC, TPcre **pud) { + const char *error; + int erroffset; + TPcre *ud; + const unsigned char *tables = NULL; + + ud = (TPcre*)lua_newuserdata (L, sizeof (TPcre)); + memset (ud, 0, sizeof (TPcre)); /* initialize all members to 0 */ + lua_pushvalue (L, LUA_ENVIRONINDEX); + lua_setmetatable (L, -2); + + if (argC->locale) { + char old_locale[256]; + strcpy (old_locale, setlocale (LC_CTYPE, NULL)); /* store the locale */ + if (NULL == setlocale (LC_CTYPE, argC->locale)) /* set new locale */ + return luaL_error (L, "cannot set locale"); + ud->tables = tables = pcre_maketables (); /* make tables with new locale */ + setlocale (LC_CTYPE, old_locale); /* restore the old locale */ + } + else if (argC->tables) { + tables = argC->tables; + lua_pushinteger (L, INDEX_CHARTABLES_LINK); + lua_rawget (L, LUA_ENVIRONINDEX); + lua_pushvalue (L, -2); + lua_pushvalue (L, argC->tablespos); + lua_rawset (L, -3); + lua_pop (L, 1); + } + + ud->pr = pcre_compile (argC->pattern, argC->cflags, &error, &erroffset, tables); + if (!ud->pr) + return luaL_error (L, "%s (pattern offset: %d)", error, erroffset + 1); + + ud->extra = pcre_study (ud->pr, 0, &error); + if (error) return luaL_error (L, "%s", error); + + pcre_fullinfo (ud->pr, ud->extra, PCRE_INFO_CAPTURECOUNT, &ud->ncapt); + /* need (2 ints per capture, plus one for substring match) * 3/2 */ + ud->match = (int *) Lmalloc (L, (ALG_NSUB(ud) + 1) * 3 * sizeof (int)); + + if (pud) *pud = ud; + return 1; +} + +#if PCRE_MAJOR >= 4 +/* the target table must be on lua stack top */ +static void do_named_subpatterns (lua_State *L, TPcre *ud, const char *text) { + int i, namecount, name_entry_size; + unsigned char *name_table, *tabptr; + + /* do named subpatterns - NJG */ + pcre_fullinfo (ud->pr, ud->extra, PCRE_INFO_NAMECOUNT, &namecount); + if (namecount <= 0) + return; + pcre_fullinfo (ud->pr, ud->extra, PCRE_INFO_NAMETABLE, &name_table); + pcre_fullinfo (ud->pr, ud->extra, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size); + tabptr = name_table; + for (i = 0; i < namecount; i++) { + int n = (tabptr[0] << 8) | tabptr[1]; /* number of the capturing parenthesis */ + if (n > 0 && n <= ALG_NSUB(ud)) { /* check range */ + lua_pushstring (L, (char *)tabptr + 2); /* name of the capture, zero terminated */ + ALG_PUSHSUB_OR_FALSE (L, ud, text, n); + lua_rawset (L, -3); + } + tabptr += name_entry_size; + } +} +#endif /* #if PCRE_MAJOR >= 4 */ + +#if PCRE_MAJOR >= 6 +static int Lpcre_dfa_exec (lua_State *L) +{ + TArgExec argE; + TPcre *ud; + int res; + int *buf, *ovector, *wspace; + + checkarg_dfa_exec (L, &argE, &ud); + buf = (int*) Lmalloc (L, (argE.ovecsize + argE.wscount) * sizeof(int)); + ovector = buf; + wspace = buf + argE.ovecsize; + + res = pcre_dfa_exec (ud->pr, ud->extra, argE.text, (int)argE.textlen, + argE.startoffset, argE.eflags, ovector, argE.ovecsize, wspace, argE.wscount); + + if (ALG_ISMATCH (res) || res == PCRE_ERROR_PARTIAL) { + int i; + int max = (res>0) ? res : (res==0) ? (int)argE.ovecsize/2 : 1; + lua_pushinteger (L, ovector[0] + 1); /* 1-st return value */ + lua_newtable (L); /* 2-nd return value */ + for (i=0; i<max; i++) { + lua_pushinteger (L, ovector[i+i+1]); + lua_rawseti (L, -2, i+1); + } + lua_pushinteger (L, res); /* 3-rd return value */ + free (buf); + return 3; + } + else { + free (buf); + if (res == ALG_NOMATCH) + return lua_pushnil (L), 1; + else + return generate_error (L, ud, res); + } +} +#endif /* #if PCRE_MAJOR >= 6 */ + +#ifdef ALG_USERETRY + static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry) { + int eflags = retry ? (argE->eflags|PCRE_NOTEMPTY|PCRE_ANCHORED) : argE->eflags; + return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, + argE->startoffset, eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); + } +#else + static int gmatch_exec (TUserdata *ud, TArgExec *argE) { + return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, + argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); + } +#endif + +static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { + lua_pushlstring (L, argE->text, argE->textlen); +} + +static int findmatch_exec (TPcre *ud, TArgExec *argE) { + return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, + argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); +} + +#ifdef ALG_USERETRY + static int gsub_exec (TPcre *ud, TArgExec *argE, int st, int retry) { + int eflags = retry ? (argE->eflags|PCRE_NOTEMPTY|PCRE_ANCHORED) : argE->eflags; + return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, + st, eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); + } +#else + static int gsub_exec (TPcre *ud, TArgExec *argE, int st) { + return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, + st, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); + } +#endif + +static int split_exec (TPcre *ud, TArgExec *argE, int offset) { + return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, offset, + argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); +} + +static int Lpcre_gc (lua_State *L) { + TPcre *ud = check_ud (L); + if (ud->freed == 0) { /* precaution against "manual" __gc calling */ + ud->freed = 1; + if (ud->pr) pcre_free (ud->pr); + if (ud->extra) pcre_free (ud->extra); + if (ud->tables) pcre_free ((void *)ud->tables); + if (ud->match) free (ud->match); + } + return 0; +} + +static int Lpcre_tostring (lua_State *L) { + TPcre *ud = check_ud (L); + if (ud->freed == 0) + lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); + else + lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); + return 1; +} + +static int chartables_tostring (lua_State *L) { + void **ud = check_chartables (L, 1); + lua_pushfstring (L, "%s (%p)", chartables_typename, ud); + return 1; +} + +static int Lpcre_version (lua_State *L) { + lua_pushstring (L, pcre_version ()); + return 1; +} + +static const luaL_reg chartables_meta[] = { + { "__gc", chartables_gc }, + { "__tostring", chartables_tostring }, + { NULL, NULL } +}; + +static const luaL_reg regex_meta[] = { + { "exec", ud_exec }, + { "tfind", ud_tfind }, /* old name: match */ + { "find", ud_find }, + { "match", ud_match }, +#if PCRE_MAJOR >= 6 + { "dfa_exec", Lpcre_dfa_exec }, +#endif + { "__gc", Lpcre_gc }, + { "__tostring", Lpcre_tostring }, + { NULL, NULL } +}; + +static const luaL_reg rexlib[] = { + { "match", match }, + { "find", find }, + { "gmatch", gmatch }, + { "gsub", gsub }, + { "split", split }, + { "new", ud_new }, + { "plainfind", plainfind_func }, + { "flags", Lpcre_get_flags }, + { "version", Lpcre_version }, + { "maketables", Lpcre_maketables }, +#if PCRE_MAJOR >= 4 + { "config", Lpcre_config }, +#endif + { NULL, NULL } +}; + +/* Open the library */ +REX_API int REX_OPENLIB (lua_State *L) { + if (PCRE_MAJOR > atoi (pcre_version ())) { + return luaL_error (L, "%s requires at least version %d of PCRE library", + REX_LIBNAME, (int)PCRE_MAJOR); + } + /* create a new function environment to serve as a metatable for methods */ + lua_newtable (L); + lua_pushvalue (L, -1); + lua_replace (L, LUA_ENVIRONINDEX); + lua_pushvalue(L, -1); /* mt.__index = mt */ + lua_setfield(L, -2, "__index"); + luaL_register (L, NULL, regex_meta); + + /* register functions */ + luaL_register (L, REX_LIBNAME, rexlib); + lua_pushliteral (L, REX_VERSION" (for PCRE)"); + lua_setfield (L, -2, "_VERSION"); + + /* create a table and register it as a metatable for "chartables" userdata */ + lua_pushinteger (L, INDEX_CHARTABLES_META); + lua_newtable (L); + lua_pushliteral (L, "access denied"); + lua_setfield (L, -2, "__metatable"); + luaL_register (L, NULL, chartables_meta); + lua_rawset (L, LUA_ENVIRONINDEX); + + /* create a table for connecting "chartables" userdata to "regex" userdata */ + lua_pushinteger (L, INDEX_CHARTABLES_LINK); + lua_newtable (L); + lua_pushliteral (L, "k"); /* weak keys */ + lua_setfield (L, -2, "__mode"); + lua_pushvalue (L, -1); /* setmetatable (tb, tb) */ + lua_setmetatable (L, -2); + lua_rawset (L, LUA_ENVIRONINDEX); + + return 1; +} + diff --git a/src/pcre/lpcre_f.c b/src/pcre/lpcre_f.c new file mode 100755 index 0000000..a6c9cf9 --- /dev/null +++ b/src/pcre/lpcre_f.c @@ -0,0 +1,189 @@ +/* lpcre.c - PCRE regular expression library */ +/* See Copyright Notice in the file LICENSE */ + +#include <pcre.h> +#include "lua.h" +#include "lauxlib.h" +#include "../common.h" + +#define VERSION_PCRE (PCRE_MAJOR*100 + PCRE_MINOR) + +static flag_pair pcre_flags[] = { + { "MAJOR", PCRE_MAJOR }, + { "MINOR", PCRE_MINOR }, +/*---------------------------------------------------------------------------*/ + { "CASELESS", PCRE_CASELESS }, + { "MULTILINE", PCRE_MULTILINE }, + { "DOTALL", PCRE_DOTALL }, + { "EXTENDED", PCRE_EXTENDED }, + { "ANCHORED", PCRE_ANCHORED }, + { "DOLLAR_ENDONLY", PCRE_DOLLAR_ENDONLY }, + { "EXTRA", PCRE_EXTRA }, + { "NOTBOL", PCRE_NOTBOL }, + { "NOTEOL", PCRE_NOTEOL }, + { "UNGREEDY", PCRE_UNGREEDY }, + { "NOTEMPTY", PCRE_NOTEMPTY }, + { "UTF8", PCRE_UTF8 }, +#if VERSION_PCRE >= 400 + { "NO_AUTO_CAPTURE", PCRE_NO_AUTO_CAPTURE }, + { "NO_UTF8_CHECK", PCRE_NO_UTF8_CHECK }, +#endif +#if VERSION_PCRE >= 500 + { "AUTO_CALLOUT", PCRE_AUTO_CALLOUT }, + { "PARTIAL", PCRE_PARTIAL }, +#endif +#if VERSION_PCRE >= 600 + { "DFA_SHORTEST", PCRE_DFA_SHORTEST }, + { "DFA_RESTART", PCRE_DFA_RESTART }, + { "FIRSTLINE", PCRE_FIRSTLINE }, +#endif +#if VERSION_PCRE >= 607 + { "DUPNAMES", PCRE_DUPNAMES }, + { "NEWLINE_CR", PCRE_NEWLINE_CR }, + { "NEWLINE_LF", PCRE_NEWLINE_LF }, + { "NEWLINE_CRLF", PCRE_NEWLINE_CRLF }, +#endif +#if VERSION_PCRE >= 700 + { "NEWLINE_ANY", PCRE_NEWLINE_ANY }, +#endif +#if VERSION_PCRE >= 701 + { "NEWLINE_ANYCRLF", PCRE_NEWLINE_ANYCRLF }, +#endif +#if VERSION_PCRE >= 704 + { "BSR_ANYCRLF", PCRE_BSR_ANYCRLF }, + { "BSR_UNICODE", PCRE_BSR_UNICODE }, +#endif +#if VERSION_PCRE >= 707 + { "JAVASCRIPT_COMPAT", PCRE_JAVASCRIPT_COMPAT }, +#endif +/*---------------------------------------------------------------------------*/ + { "INFO_OPTIONS", PCRE_INFO_OPTIONS }, + { "INFO_SIZE", PCRE_INFO_SIZE }, + { "INFO_CAPTURECOUNT", PCRE_INFO_CAPTURECOUNT }, + { "INFO_BACKREFMAX", PCRE_INFO_BACKREFMAX }, +#if VERSION_PCRE >= 400 + { "INFO_FIRSTBYTE", PCRE_INFO_FIRSTBYTE }, +#endif + { "INFO_FIRSTCHAR", PCRE_INFO_FIRSTCHAR }, + { "INFO_FIRSTTABLE", PCRE_INFO_FIRSTTABLE }, + { "INFO_LASTLITERAL", PCRE_INFO_LASTLITERAL }, +#if VERSION_PCRE >= 400 + { "INFO_NAMEENTRYSIZE", PCRE_INFO_NAMEENTRYSIZE }, + { "INFO_NAMECOUNT", PCRE_INFO_NAMECOUNT }, + { "INFO_NAMETABLE", PCRE_INFO_NAMETABLE }, + { "INFO_STUDYSIZE", PCRE_INFO_STUDYSIZE }, +#endif +#if VERSION_PCRE >= 500 + { "INFO_DEFAULT_TABLES", PCRE_INFO_DEFAULT_TABLES }, +#endif +#ifdef PCRE_INFO_OKPARTIAL + { "INFO_OKPARTIAL", PCRE_INFO_OKPARTIAL }, +#endif +#ifdef PCRE_INFO_JCHANGED + { "INFO_JCHANGED", PCRE_INFO_JCHANGED }, +#endif +#ifdef PCRE_INFO_HASCRORLF + { "INFO_HASCRORLF", PCRE_INFO_HASCRORLF }, +#endif +/*---------------------------------------------------------------------------*/ +#if VERSION_PCRE >= 400 + { "EXTRA_STUDY_DATA", PCRE_EXTRA_STUDY_DATA }, + { "EXTRA_MATCH_LIMIT", PCRE_EXTRA_MATCH_LIMIT }, + { "EXTRA_CALLOUT_DATA", PCRE_EXTRA_CALLOUT_DATA }, +#endif +#if VERSION_PCRE >= 500 + { "EXTRA_TABLES", PCRE_EXTRA_TABLES }, +#endif +#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION + { "EXTRA_MATCH_LIMIT_RECURSION", PCRE_EXTRA_MATCH_LIMIT_RECURSION }, +#endif +/*---------------------------------------------------------------------------*/ + { NULL, 0 } +}; + +flag_pair pcre_error_flags[] = { + { "ERROR_NOMATCH", PCRE_ERROR_NOMATCH }, + { "ERROR_NULL", PCRE_ERROR_NULL }, + { "ERROR_BADOPTION", PCRE_ERROR_BADOPTION }, + { "ERROR_BADMAGIC", PCRE_ERROR_BADMAGIC }, +#if VERSION_PCRE >= 700 + { "ERROR_UNKNOWN_OPCODE", PCRE_ERROR_UNKNOWN_OPCODE }, +#endif + { "ERROR_UNKNOWN_NODE", PCRE_ERROR_UNKNOWN_NODE }, + { "ERROR_NOMEMORY", PCRE_ERROR_NOMEMORY }, + { "ERROR_NOSUBSTRING", PCRE_ERROR_NOSUBSTRING }, +#if VERSION_PCRE >= 400 + { "ERROR_MATCHLIMIT", PCRE_ERROR_MATCHLIMIT }, + { "ERROR_CALLOUT", PCRE_ERROR_CALLOUT }, + { "ERROR_BADUTF8", PCRE_ERROR_BADUTF8 }, + { "ERROR_BADUTF8_OFFSET", PCRE_ERROR_BADUTF8_OFFSET }, +#endif +#if VERSION_PCRE >= 500 + { "ERROR_PARTIAL", PCRE_ERROR_PARTIAL }, + { "ERROR_BADPARTIAL", PCRE_ERROR_BADPARTIAL }, + { "ERROR_INTERNAL", PCRE_ERROR_INTERNAL }, + { "ERROR_BADCOUNT", PCRE_ERROR_BADCOUNT }, +#endif +#if VERSION_PCRE >= 600 + { "ERROR_DFA_UITEM", PCRE_ERROR_DFA_UITEM }, + { "ERROR_DFA_UCOND", PCRE_ERROR_DFA_UCOND }, + { "ERROR_DFA_UMLIMIT", PCRE_ERROR_DFA_UMLIMIT }, + { "ERROR_DFA_WSSIZE", PCRE_ERROR_DFA_WSSIZE }, + { "ERROR_DFA_RECURSE", PCRE_ERROR_DFA_RECURSE }, +#endif +#if VERSION_PCRE >= 607 + { "ERROR_RECURSIONLIMIT", PCRE_ERROR_RECURSIONLIMIT }, +#endif +#if VERSION_PCRE >= 700 + { "ERROR_BADNEWLINE", PCRE_ERROR_BADNEWLINE }, +#endif +#ifdef PCRE_ERROR_NULLWSLIMIT + { "ERROR_NULLWSLIMIT", PCRE_ERROR_NULLWSLIMIT }, +#endif +/*---------------------------------------------------------------------------*/ + { NULL, 0 } +}; + +#if VERSION_PCRE >= 400 +static flag_pair pcre_config_flags[] = { + { "CONFIG_UTF8", PCRE_CONFIG_UTF8 }, + { "CONFIG_NEWLINE", PCRE_CONFIG_NEWLINE }, + { "CONFIG_LINK_SIZE", PCRE_CONFIG_LINK_SIZE }, + { "CONFIG_POSIX_MALLOC_THRESHOLD", PCRE_CONFIG_POSIX_MALLOC_THRESHOLD }, + { "CONFIG_MATCH_LIMIT", PCRE_CONFIG_MATCH_LIMIT }, + { "CONFIG_STACKRECURSE", PCRE_CONFIG_STACKRECURSE }, +#if VERSION_PCRE >= 500 + { "CONFIG_UNICODE_PROPERTIES", PCRE_CONFIG_UNICODE_PROPERTIES }, +#endif +#if VERSION_PCRE >= 650 + { "CONFIG_MATCH_LIMIT_RECURSION", PCRE_CONFIG_MATCH_LIMIT_RECURSION }, +#endif +#if VERSION_PCRE >= 704 + { "CONFIG_BSR", PCRE_CONFIG_BSR }, +#endif +/*---------------------------------------------------------------------------*/ + { NULL, 0 } +}; + +int Lpcre_config (lua_State *L) { + int val; + flag_pair *fp; + if (lua_istable (L, 1)) + lua_settop (L, 1); + else + lua_newtable (L); + for (fp = pcre_config_flags; fp->key; ++fp) { + if (0 == pcre_config (fp->val, &val)) { + lua_pushinteger (L, val); + lua_setfield (L, -2, fp->key); + } + } + return 1; +} +#endif /* #if VERSION_PCRE >= 400 */ + +int Lpcre_get_flags (lua_State *L) { + const flag_pair* fps[] = { pcre_flags, pcre_error_flags, NULL }; + return get_flags (L, fps); +} + diff --git a/src/pcre/rex_pcre.mak b/src/pcre/rex_pcre.mak new file mode 100755 index 0000000..04ebff9 --- /dev/null +++ b/src/pcre/rex_pcre.mak @@ -0,0 +1,38 @@ +# makefile for rex_pcre library + +include ../defaults.mak + +# === USER SETTINGS === +# =========================================================================== + +# These are default values. +INC = +LIB = -lpcre + +# If the default settings don't work for your system, +# try to uncomment and edit the settings below. +#INC = +#LIB = -lpcre + +# Target name +TRG = rex_pcre + +# =========================================================================== +# === END OF USER SETTINGS === + +OBJ = lpcre.o lpcre_f.o common.o + +include ../common.mak + +# static PCRE regexp library binding +ar_pcre: $(TRG_AR) + +# dynamic PCRE regexp library binding +so_pcre: $(TRG_SO) + +# Dependencies +lpcre.o: lpcre.c common.h algo.h +lpcre_f.o: lpcre_f.c common.h +common.o: common.c common.h + +# (End of Makefile) diff --git a/src/posix/lposix.c b/src/posix/lposix.c new file mode 100755 index 0000000..bf07299 --- /dev/null +++ b/src/posix/lposix.c @@ -0,0 +1,295 @@ +/* lposix.c - Lua binding of POSIX regular expressions library */ +/* See Copyright Notice in the file LICENSE */ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "lua.h" +#include "lauxlib.h" +#include "../common.h" + +#ifndef REX_POSIX_INCLUDE +# include <regex.h> +#else +# include REX_POSIX_INCLUDE +#endif + +/* These 2 settings may be redefined from the command-line or the makefile. + * They should be kept in sync between themselves and with the target name. + */ +#ifndef REX_LIBNAME +# define REX_LIBNAME "rex_posix" +#endif +#ifndef REX_OPENLIB +# define REX_OPENLIB luaopen_rex_posix +#endif + +#define REX_TYPENAME REX_LIBNAME"_regex" + +/* Test if regex.h corresponds to the extended POSIX library, i.e. H.Spencer's. + This test may not work as intended if regex.h introduced REG_BASIC, etc. + via enum rather than #define. + If that's the case, add -DREX_POSIX_EXT in the makefile/command line. +*/ +#ifndef REX_POSIX_EXT +# if defined(REG_BASIC) && defined(REG_STARTEND) +# define REX_POSIX_EXT +# endif +#endif + +#define ALG_CFLAGS_DFLT REG_EXTENDED +#ifdef REX_POSIX_EXT +# define ALG_EFLAGS_DFLT REG_STARTEND +#else +# define ALG_EFLAGS_DFLT 0 +#endif + +#define ALG_NOMATCH REG_NOMATCH +#define ALG_ISMATCH(res) ((res) == 0) +#define ALG_SUBBEG(ud,n) ud->match[n].rm_so +#define ALG_SUBEND(ud,n) ud->match[n].rm_eo +#define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) +#define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) +#ifdef REX_NSUB_BASE1 +# define ALG_NSUB(ud) ((int)ud->r.re_nsub - 1) +#else +# define ALG_NSUB(ud) ((int)ud->r.re_nsub) +#endif + +#define ALG_PUSHSUB(L,ud,text,n) \ + lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) + +#define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ + (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) + +#define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) +#define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) +#define ALG_PUSHOFFSETS(L,ud,offs,n) \ + (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) + +#define ALG_BASE(st) (st) +#define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT) + +typedef struct { + regex_t r; + regmatch_t * match; + int freed; +} TPosix; + +#define TUserdata TPosix + +#include "../algo.h" + +/* Functions + ****************************************************************************** + */ + +static int generate_error (lua_State *L, const TPosix *ud, int errcode) { + char errbuf[80]; + regerror (errcode, &ud->r, errbuf, sizeof (errbuf)); + return luaL_error (L, "%s", errbuf); +} + +static int compile_regex (lua_State *L, const TArgComp *argC, TPosix **pud) { + int res; + TPosix *ud; + + ud = (TPosix *)lua_newuserdata (L, sizeof (TPosix)); + memset (ud, 0, sizeof (TPosix)); /* initialize all members to 0 */ + +#ifdef REX_POSIX_EXT + if (argC->cflags & REG_PEND) + ud->r.re_endp = argC->pattern + argC->patlen; +#endif + + res = regcomp (&ud->r, argC->pattern, argC->cflags); + if (res != 0) + return generate_error (L, ud, res); + + if (argC->cflags & REG_NOSUB) + ud->r.re_nsub = 0; + ud->match = (regmatch_t *) Lmalloc (L, (ALG_NSUB(ud) + 1) * sizeof (regmatch_t)); + lua_pushvalue (L, LUA_ENVIRONINDEX); + lua_setmetatable (L, -2); + + if (pud) *pud = ud; + return 1; +} + +#ifdef REX_POSIX_EXT +static void CheckStartEnd (TArgExec *argE, TPosix *ud) { + if (argE->eflags & REG_STARTEND) { + ud->match[0].rm_so = argE->startoffset; + ud->match[0].rm_eo = argE->textlen; + argE->startoffset = 0; + } + else + argE->text += argE->startoffset; +} +#endif + +static int gmatch_exec (TUserdata *ud, TArgExec *argE) { + if (argE->startoffset > 0) + argE->eflags |= REG_NOTBOL; + +#ifdef REX_POSIX_EXT + if (argE->eflags & REG_STARTEND) { + ALG_SUBBEG(ud,0) = 0; + ALG_SUBEND(ud,0) = argE->textlen - argE->startoffset; + } +#endif + + argE->text += argE->startoffset; + return regexec (&ud->r, argE->text, ALG_NSUB(ud) + 1, ud->match, argE->eflags); +} + +static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { +#ifdef REX_POSIX_EXT + if (argE->eflags & REG_STARTEND) + lua_pushlstring (L, argE->text, argE->textlen); + else + lua_pushlstring (L, argE->text, strlen (argE->text)); +#else + lua_pushlstring (L, argE->text, strlen (argE->text)); +#endif +} + +static int findmatch_exec (TPosix *ud, TArgExec *argE) { +#ifdef REX_POSIX_EXT + CheckStartEnd (argE, ud); +#else + argE->text += argE->startoffset; +#endif + return regexec (&ud->r, argE->text, ALG_NSUB(ud) + 1, ud->match, argE->eflags); +} + +static int gsub_exec (TPosix *ud, TArgExec *argE, int st) { +#ifdef REX_POSIX_EXT + if(argE->eflags & REG_STARTEND) { + ALG_SUBBEG(ud,0) = 0; + ALG_SUBEND(ud,0) = argE->textlen - st; + } +#endif + if (st > 0) + argE->eflags |= REG_NOTBOL; + return regexec (&ud->r, argE->text+st, ALG_NSUB(ud)+1, ud->match, argE->eflags); +} + +static int split_exec (TPosix *ud, TArgExec *argE, int offset) { +#ifdef REX_POSIX_EXT + if (argE->eflags & REG_STARTEND) { + ALG_SUBBEG(ud,0) = 0; + ALG_SUBEND(ud,0) = argE->textlen - offset; + } +#endif + if (offset > 0) + argE->eflags |= REG_NOTBOL; + + return regexec (&ud->r, argE->text + offset, ALG_NSUB(ud) + 1, ud->match, argE->eflags); +} + +static int Posix_gc (lua_State *L) { + TPosix *ud = check_ud (L); + if (ud->freed == 0) { /* precaution against "manual" __gc calling */ + ud->freed = 1; + regfree (&ud->r); + if (ud->match) + free (ud->match); + } + return 0; +} + +static int Posix_tostring (lua_State *L) { + TPosix *ud = check_ud (L); + if (ud->freed == 0) + lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); + else + lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); + return 1; +} + +static flag_pair posix_flags[] = +{ +#ifdef REX_POSIX_EXT + { "BASIC", REG_BASIC }, + { "NOSPEC", REG_NOSPEC }, + { "PEND", REG_PEND }, + { "STARTEND", REG_STARTEND }, +#endif + { "EXTENDED", REG_EXTENDED }, + { "ICASE", REG_ICASE }, + { "NOSUB", REG_NOSUB }, + { "NEWLINE", REG_NEWLINE }, + { "NOTBOL", REG_NOTBOL }, + { "NOTEOL", REG_NOTEOL }, +/*---------------------------------------------------------------------------*/ + { NULL, 0 } +}; + +static flag_pair posix_error_flags[] = { + { "NOMATCH", REG_NOMATCH }, + { "BADPAT", REG_BADPAT }, + { "ECOLLATE", REG_ECOLLATE }, + { "ECTYPE", REG_ECTYPE }, + { "EESCAPE", REG_EESCAPE }, + { "ESUBREG", REG_ESUBREG }, + { "EBRACK", REG_EBRACK }, + { "EPAREN", REG_EPAREN }, + { "EBRACE", REG_EBRACE }, + { "BADBR", REG_BADBR }, + { "ERANGE", REG_ERANGE }, + { "ESPACE", REG_ESPACE }, + { "BADRPT", REG_BADRPT }, +#ifdef REX_POSIX_EXT + { "EMPTY", REG_EMPTY }, + { "ASSERT", REG_ASSERT }, + { "INVARG", REG_INVARG }, +#endif +/*---------------------------------------------------------------------------*/ + { NULL, 0 } +}; + +static int Posix_get_flags (lua_State *L) { + const flag_pair* fps[] = { posix_flags, posix_error_flags, NULL }; + return get_flags (L, fps); +} + +static const luaL_reg posixmeta[] = { + { "exec", ud_exec }, + { "tfind", ud_tfind }, /* old match */ + { "find", ud_find }, + { "match", ud_match }, + { "__gc", Posix_gc }, + { "__tostring", Posix_tostring }, + { NULL, NULL} +}; + +static const luaL_reg rexlib[] = { + { "match", match }, + { "find", find }, + { "gmatch", gmatch }, + { "gsub", gsub }, + { "split", split }, + { "new", ud_new }, + { "flags", Posix_get_flags }, + { "plainfind", plainfind_func }, + { NULL, NULL } +}; + +/* Open the library */ +REX_API int REX_OPENLIB (lua_State *L) +{ + /* create a new function environment to serve as a metatable for methods */ + lua_newtable (L); + lua_pushvalue (L, -1); + lua_replace (L, LUA_ENVIRONINDEX); + lua_pushvalue(L, -1); /* mt.__index = mt */ + lua_setfield(L, -2, "__index"); + luaL_register (L, NULL, posixmeta); + + /* register functions */ + luaL_register (L, REX_LIBNAME, rexlib); + lua_pushliteral (L, REX_VERSION" (for POSIX regexes)"); + lua_setfield (L, -2, "_VERSION"); + return 1; +} diff --git a/src/posix/rex_posix.mak b/src/posix/rex_posix.mak new file mode 100755 index 0000000..15d20b6 --- /dev/null +++ b/src/posix/rex_posix.mak @@ -0,0 +1,48 @@ +# makefile for rex_posix library + +include ../defaults.mak + +# === USER SETTINGS === +# =========================================================================== + +# These are default values. +INC = +LIB = + +# If the default settings don't work for your system, +# try to uncomment and edit the settings below. +#INC = +#LIB = -lc + +# WARNING: +# If you want to use a POSIX regex library that is not the system +# default, make sure you set both the INC and LIB variables correctly, +# as if a header file and library are used which do not match, you may +# well get segmentation faults (or worse). + +# The following lines work for the rxspencer library, when installed +# under /usr (note the above warning!) +#INC = -I/usr/include/rxspencer +#LIB = -lrxspencer + +# Target name +TRG = rex_posix + +# =========================================================================== +# === END OF USER SETTINGS === + +OBJ = lposix.o common.o + +include ../common.mak + +# static POSIX regexp library binding +ar_posix: $(TRG_AR) + +# dynamic POSIX regexp library binding +so_posix: $(TRG_SO) + +# Dependencies +lposix.o: lposix.c common.h algo.h +common.o: common.c common.h + +# (End of Makefile) diff --git a/src/tre/ltre.c b/src/tre/ltre.c new file mode 100755 index 0000000..d2acce7 --- /dev/null +++ b/src/tre/ltre.c @@ -0,0 +1,358 @@ +/* ltre.c - Lua binding of TRE regular expressions library */ +/* See Copyright Notice in the file LICENSE */ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "lua.h" +#include "lauxlib.h" +#include "../common.h" + +#include <tre/regex.h> + +/* These 2 settings may be redefined from the command-line or the makefile. + * They should be kept in sync between themselves and with the target name. + */ +#ifndef REX_LIBNAME +# define REX_LIBNAME "rex_tre" +#endif +#ifndef REX_OPENLIB +# define REX_OPENLIB luaopen_rex_tre +#endif + +#define REX_TYPENAME REX_LIBNAME"_regex" + +#define ALG_CFLAGS_DFLT REG_EXTENDED +#define ALG_EFLAGS_DFLT 0 + +#define ALG_NOMATCH REG_NOMATCH +#define ALG_ISMATCH(res) ((res) == 0) +#define ALG_SUBBEG(ud,n) ud->match[n].rm_so +#define ALG_SUBEND(ud,n) ud->match[n].rm_eo +#define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) +#define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) +#define ALG_NSUB(ud) ((int)ud->r.re_nsub) + +#define ALG_PUSHSUB(L,ud,text,n) \ + lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) + +#define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ + (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) + +#define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) +#define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) +#define ALG_PUSHOFFSETS(L,ud,offs,n) \ + (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) + +#define ALG_BASE(st) (st) +#define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT) + +typedef struct { + regex_t r; + regmatch_t * match; + int freed; +} TPosix; + +#define TUserdata TPosix + +#include "../algo.h" + +/* Functions + ****************************************************************************** + */ + +static void checkarg_regaparams (lua_State *L, int stackpos, regaparams_t *argP) { + if (lua_type (L, stackpos) != LUA_TTABLE) /* allow for userdata? */ + luaL_argerror (L, stackpos, "table expected"); + lua_pushvalue (L, stackpos); + argP->cost_ins = get_int_field (L, "cost_ins"); + argP->cost_del = get_int_field (L, "cost_del"); + argP->cost_subst = get_int_field (L, "cost_subst"); + argP->max_cost = get_int_field (L, "max_cost"); + argP->max_ins = get_int_field (L, "max_ins"); + argP->max_del = get_int_field (L, "max_del"); + argP->max_subst = get_int_field (L, "max_subst"); + argP->max_err = get_int_field (L, "max_err"); + lua_pop (L, 1); +} + +/* method r:atfind (s, params, [st], [ef]) */ +/* method r:aexec (s, params, [st], [ef]) */ +static void checkarg_atfind (lua_State *L, TArgExec *argE, TPosix **ud, + regaparams_t *argP) { + *ud = check_ud (L); + argE->text = luaL_checklstring (L, 2, &argE->textlen); + checkarg_regaparams (L, 3, argP); + argE->startoffset = get_startoffset (L, 4, argE->textlen); + argE->eflags = luaL_optint (L, 5, ALG_EFLAGS_DFLT); +} + +static int generate_error (lua_State *L, const TPosix *ud, int errcode) { + char errbuf[80]; + regerror (errcode, &ud->r, errbuf, sizeof (errbuf)); + return luaL_error (L, "%s", errbuf); +} + +static int compile_regex (lua_State *L, const TArgComp *argC, TPosix **pud) { + int res; + TPosix *ud; + + ud = (TPosix *)lua_newuserdata (L, sizeof (TPosix)); + memset (ud, 0, sizeof (TPosix)); /* initialize all members to 0 */ + + res = regncomp (&ud->r, argC->pattern, argC->patlen, argC->cflags); + if (res != 0) + return generate_error (L, ud, res); + + if (argC->cflags & REG_NOSUB) + ud->r.re_nsub = 0; + ud->match = (regmatch_t *) Lmalloc (L, (ALG_NSUB(ud) + 1) * sizeof (regmatch_t)); + lua_pushvalue (L, LUA_ENVIRONINDEX); + lua_setmetatable (L, -2); + + if (pud) *pud = ud; + return 1; +} + +static int generic_atfind (lua_State *L, int tfind) { + int res; + TArgExec argE; + TPosix *ud; + regaparams_t argP; + regamatch_t res_match; + + checkarg_atfind (L, &argE, &ud, &argP); + if (argE.startoffset > (int)argE.textlen) + return lua_pushnil(L), 1; + + argE.text += argE.startoffset; + res_match.nmatch = ALG_NSUB(ud) + 1; + res_match.pmatch = ud->match; + + /* execute the search */ + res = reganexec (&ud->r, argE.text, argE.textlen - argE.startoffset, + &res_match, argP, argE.eflags); + if (ALG_ISMATCH (res)) { + ALG_PUSHOFFSETS (L, ud, argE.startoffset, 0); + if (tfind) + push_substring_table (L, ud, argE.text); + else + push_offset_table (L, ud, argE.startoffset); + /* set values in the dictionary part of the table */ + set_int_field (L, "cost", res_match.cost); + set_int_field (L, "num_ins", res_match.num_ins); + set_int_field (L, "num_del", res_match.num_del); + set_int_field (L, "num_subst", res_match.num_subst); + return 3; + } + else if (res == ALG_NOMATCH) + return lua_pushnil (L), 1; + else + return generate_error (L, ud, res); +} + +static int Ltre_atfind (lua_State *L) { + return generic_atfind (L, 1); +} + +static int Ltre_aexec (lua_State *L) { + return generic_atfind (L, 0); +} + +static int gmatch_exec (TUserdata *ud, TArgExec *argE) { + if (argE->startoffset > 0) + argE->eflags |= REG_NOTBOL; + argE->text += argE->startoffset; + return regnexec (&ud->r, argE->text, argE->textlen - argE->startoffset, + ALG_NSUB(ud) + 1, ud->match, argE->eflags); +} + +static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { + lua_pushlstring (L, argE->text, argE->textlen); +} + +static int findmatch_exec (TPosix *ud, TArgExec *argE) { + argE->text += argE->startoffset; + return regnexec (&ud->r, argE->text, argE->textlen - argE->startoffset, + ALG_NSUB(ud) + 1, ud->match, argE->eflags); +} + +static int gsub_exec (TPosix *ud, TArgExec *argE, int st) { + if (st > 0) + argE->eflags |= REG_NOTBOL; + return regnexec (&ud->r, argE->text+st, argE->textlen-st, ALG_NSUB(ud)+1, + ud->match, argE->eflags); +} + +static int split_exec (TPosix *ud, TArgExec *argE, int offset) { + if (offset > 0) + argE->eflags |= REG_NOTBOL; + return regnexec (&ud->r, argE->text + offset, argE->textlen - offset, + ALG_NSUB(ud) + 1, ud->match, argE->eflags); +} + +static int Ltre_have_backrefs (lua_State *L) { + TPosix *ud = check_ud (L); + lua_pushboolean (L, tre_have_backrefs (&ud->r)); + return 1; +} + +static int Ltre_have_approx (lua_State *L) { + TPosix *ud = check_ud (L); + lua_pushboolean (L, tre_have_approx (&ud->r)); + return 1; +} + +static int Ltre_gc (lua_State *L) { + TPosix *ud = check_ud (L); + if (ud->freed == 0) { /* precaution against "manual" __gc calling */ + ud->freed = 1; + regfree (&ud->r); + if (ud->match) + free (ud->match); + } + return 0; +} + +static int Ltre_tostring (lua_State *L) { + TPosix *ud = check_ud (L); + if (ud->freed == 0) + lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); + else + lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); + return 1; +} + +static flag_pair tre_flags[] = +{ + { "BASIC", REG_BASIC }, + { "NOSPEC", REG_NOSPEC }, + { "EXTENDED", REG_EXTENDED }, + { "ICASE", REG_ICASE }, + { "NOSUB", REG_NOSUB }, + { "NEWLINE", REG_NEWLINE }, + { "NOTBOL", REG_NOTBOL }, + { "NOTEOL", REG_NOTEOL }, + /* TRE-specific flags */ + { "LITERAL", REG_LITERAL }, + { "RIGHT_ASSOC", REG_RIGHT_ASSOC }, + { "UNGREEDY", REG_UNGREEDY }, + { "APPROX_MATCHER", REG_APPROX_MATCHER }, + { "BACKTRACKING_MATCHER", REG_BACKTRACKING_MATCHER }, +/*---------------------------------------------------------------------------*/ + { NULL, 0 } +}; + +static flag_pair tre_error_flags[] = { + { "OK", REG_OK }, /* TRE-specific */ + { "NOMATCH", REG_NOMATCH }, + { "BADPAT", REG_BADPAT }, + { "ECOLLATE", REG_ECOLLATE }, + { "ECTYPE", REG_ECTYPE }, + { "EESCAPE", REG_EESCAPE }, + { "ESUBREG", REG_ESUBREG }, + { "EBRACK", REG_EBRACK }, + { "EPAREN", REG_EPAREN }, + { "EBRACE", REG_EBRACE }, + { "BADBR", REG_BADBR }, + { "ERANGE", REG_ERANGE }, + { "ESPACE", REG_ESPACE }, + { "BADRPT", REG_BADRPT }, +/*---------------------------------------------------------------------------*/ + { NULL, 0 } +}; + +/* config. flags with integer value */ +static flag_pair tre_config_flags_int[] = { + { "CONFIG_APPROX", TRE_CONFIG_APPROX }, + { "CONFIG_WCHAR", TRE_CONFIG_WCHAR }, + { "CONFIG_MULTIBYTE", TRE_CONFIG_MULTIBYTE }, + { "CONFIG_SYSTEM_ABI", TRE_CONFIG_SYSTEM_ABI }, + { NULL, 0 } +}; + +/* config. flags with string value */ +static flag_pair tre_config_flags_str[] = { + { "CONFIG_VERSION", TRE_CONFIG_VERSION }, + { NULL, 0 } +}; + +static int Ltre_get_flags (lua_State *L) { + const flag_pair* fps[] = { tre_flags, tre_error_flags, NULL }; + return get_flags (L, fps); +} + +static int Ltre_config (lua_State *L) { + int intval; + const char *strval; + flag_pair *fp; + if (lua_istable (L, 1)) + lua_settop (L, 1); + else + lua_newtable (L); + for (fp = tre_config_flags_int; fp->key; ++fp) { + if (0 == tre_config (fp->val, &intval)) { + lua_pushinteger (L, intval); + lua_setfield (L, -2, fp->key); + } + } + for (fp = tre_config_flags_str; fp->key; ++fp) { + if (0 == tre_config (fp->val, &strval)) { + lua_pushstring (L, strval); + lua_setfield (L, -2, fp->key); + } + } + return 1; +} + +static int Ltre_version (lua_State *L) { + lua_pushstring (L, tre_version ()); + return 1; +} + +static const luaL_reg posixmeta[] = { + { "exec", ud_exec }, + { "tfind", ud_tfind }, + { "find", ud_find }, + { "match", ud_match }, + { "aexec", Ltre_aexec }, + { "atfind", Ltre_atfind }, + { "have_backrefs", Ltre_have_backrefs }, + { "have_approx", Ltre_have_approx }, + { "__gc", Ltre_gc }, + { "__tostring", Ltre_tostring }, + { NULL, NULL} +}; + +static const luaL_reg rexlib[] = { + { "match", match }, + { "find", find }, + { "gmatch", gmatch }, + { "gsub", gsub }, + { "split", split }, + { "new", ud_new }, + { "flags", Ltre_get_flags }, + { "plainfind", plainfind_func }, + { "config", Ltre_config }, + { "version", Ltre_version }, + { NULL, NULL } +}; + +/* Open the library */ +REX_API int REX_OPENLIB (lua_State *L) +{ + /* create a new function environment to serve as a metatable for methods */ + lua_newtable (L); + lua_pushvalue (L, -1); + lua_replace (L, LUA_ENVIRONINDEX); + lua_pushvalue(L, -1); /* mt.__index = mt */ + lua_setfield(L, -2, "__index"); + luaL_register (L, NULL, posixmeta); + + /* register functions */ + luaL_register (L, REX_LIBNAME, rexlib); + lua_pushliteral (L, REX_VERSION" (for TRE regexes)"); + lua_setfield (L, -2, "_VERSION"); + return 1; +} + diff --git a/src/tre/rex_tre.mak b/src/tre/rex_tre.mak new file mode 100755 index 0000000..dd4f98d --- /dev/null +++ b/src/tre/rex_tre.mak @@ -0,0 +1,42 @@ +# makefile for rex_tre library + +include ../defaults.mak + +# === USER SETTINGS === +# =========================================================================== + +# These are default values. +INC = +LIB = -ltre + +# If the default settings don't work for your system, +# try to uncomment and edit the settings below. +#INC = -I/usr/include +#LIB = -lc + +# WARNING: +# Make sure you set both the INC and LIB variables correctly, as +# otherwise you risk using a header file and library that do not +# match, and you may well get segmentation faults (or worse). + +# Target name +TRG = rex_tre + +# =========================================================================== +# === END OF USER SETTINGS === + +OBJ = ltre.o common.o + +include ../common.mak + +# static TRE regexp library binding +ar_tre: $(TRG_AR) + +# dynamic TRE regexp library binding +so_tre: $(TRG_SO) + +# Dependencies +ltre.o: ltre.c common.h algo.h +common.o: common.c common.h + +# (End of Makefile) |