summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog25
-rw-r--r--awk.h15
-rw-r--r--awkgram.c23
-rw-r--r--awkgram.y23
-rw-r--r--builtin.c26
-rw-r--r--debug.c13
-rw-r--r--eval.c7
-rw-r--r--interpret.h5
-rw-r--r--profile.c28
-rw-r--r--re.c9
10 files changed, 100 insertions, 74 deletions
diff --git a/ChangeLog b/ChangeLog
index 986b8561..d1de219d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2016-11-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ Start reworking typed regexes.
+
+ * awk.h (Node_typedregex): Nuked.
+ [REGEX]: New flag.
+ (tre_reg): New member in val part of NODE union.
+ (force_string, force_number, fixtype): Remove use of Node_typedregex.
+ * awkgram.y (grammer): Use REGEX flag instead of node type.
+ (valinfo); Ditto.
+ (make_regnode): Adjust creation based on node type.
+ * builtin.c (do_length, do_print, call_sub, call_match,
+ call_split_func, do_typeof): Adjust code.
+ * debug.c (watchpoint_triggered, initialize_watch_item,
+ print_memory): Adjust code.
+ * eval.c (nodetypes): Remove Node_typedregex.
+ (flags2str): Add REGEX.
+ (setup_frame): Adjust code after removal of Node_typedregex.
+ * interpret.h (r_interpret): Adjust code after removal
+ of Node_typedregex.
+ * profile.c (pp_typed_regex): Renamed from pp_strong_regex.
+ (pp_string_or_strong_regex): Renamed from pp_string_or_strong_regex.
+ (pprint): Adjust code after removal of Node_typedregex.
+ * re.c (re_update): Adjust code after removal of Node_typedregex.
+
2016-11-04 Eli Zaretskii <eliz@gnu.org>
* builtin.c (efwrite) [__MINGW32__]: Call w32_maybe_set_errno if
diff --git a/awk.h b/awk.h
index df901190..cc487c3a 100644
--- a/awk.h
+++ b/awk.h
@@ -267,7 +267,6 @@ typedef enum nodevals {
Node_val, /* node is a value - type in flags */
Node_regex, /* a regexp, text, compiled, flags, etc */
Node_dynregex, /* a dynamic regexp */
- Node_typedregex, /* like Node_regex, but is a real type */
/* symbol table values */
Node_var, /* scalar variable, lnode is value */
@@ -385,6 +384,7 @@ typedef struct exp_node {
int idx;
wchar_t *wsp;
size_t wslen;
+ Regexp *preg;
} val;
} sub;
NODETYPE type;
@@ -461,6 +461,7 @@ typedef struct exp_node {
* See cint_array.c */
# define XARRAY 0x20000
# define NUMCONSTSTR 0x40000 /* have string value for numeric constant */
+# define REGEX 0x80000 /* this is a typed regex */
} NODE;
#define vname sub.nodep.name
@@ -508,6 +509,7 @@ typedef struct exp_node {
#else
#define numbr sub.val.fltnum
#endif
+#define tre_regs sub.val.preg
/*
* If stfmt is set to STFMT_UNUSED, it means that the string representation
@@ -1818,9 +1820,6 @@ dupnode(NODE *n)
static inline NODE *
force_string(NODE *s)
{
- if (s->type == Node_typedregex)
- return dupnode(s->re_exp);
-
if ((s->flags & STRCUR) != 0
&& (s->stfmt == STFMT_UNUSED || s->stfmt == CONVFMTidx)
)
@@ -1847,9 +1846,6 @@ unref(NODE *r)
static inline NODE *
force_number(NODE *n)
{
- if (n->type == Node_typedregex)
- return Nnull_string;
-
return (n->flags & NUMCUR) != 0 ? n : str2number(n);
}
@@ -1866,15 +1862,12 @@ force_number(NODE *n)
* It is safe to assume that the return value will be the same NODE,
* since force_number on a MAYBE_NUM should always return the same NODE,
* and force_string on an INTIND should as well.
- *
- * There is no way to handle a Node_typedregex correctly, so we ignore
- * that case.
*/
static inline NODE *
fixtype(NODE *n)
{
- assert(n->type == Node_val || n->type == Node_typedregex);
+ assert(n->type == Node_val);
if (n->type == Node_val) {
if ((n->flags & MAYBE_NUM) != 0)
return force_number(n);
diff --git a/awkgram.c b/awkgram.c
index fed91b39..4bf6b95e 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -2283,7 +2283,7 @@ yyreduce:
len = strlen(re);
exp = make_str_node(re, len, ALREADY_MALLOCED);
- n = make_regnode(Node_typedregex, exp);
+ n = make_regnode(Node_val, exp);
if (n == NULL) {
unref(exp);
YYABORT;
@@ -3154,7 +3154,7 @@ regular_print:
case 80:
#line 1278 "awkgram.y" /* yacc.c:1646 */
{
- assert((yyvsp[0])->memory->type == Node_typedregex);
+ assert(((yyvsp[0])->memory->flags & REGEX) == REGEX);
(yyvsp[0])->opcode = Op_push_re;
(yyval) = (yyvsp[0]);
}
@@ -3481,7 +3481,7 @@ regular_print:
_("regular expression on left of `~' or `!~' operator"));
assert((yyvsp[0])->opcode == Op_push_re
- && (yyvsp[0])->memory->type == Node_typedregex);
+ && ((yyvsp[0])->memory->flags & REGEX) != 0);
/* RHS is @/.../ */
(yyvsp[-1])->memory = (yyvsp[0])->memory;
bcfree((yyvsp[0]));
@@ -5836,6 +5836,7 @@ yylex(void)
lexeme = lexptr;
thisline = NULL;
+
collect_regexp:
if (want_regexp) {
int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
@@ -7051,8 +7052,8 @@ valinfo(NODE *n, Func_print print_func, FILE *fp)
{
if (n == Nnull_string)
print_func(fp, "uninitialized scalar\n");
- else if (n->type == Node_typedregex)
- print_func(fp, "@/%.*s/\n", n->re_exp->stlen, n->re_exp->stptr);
+ else if ((n->flags & REGEX) != 0)
+ print_func(fp, "@/%.*s/\n", n->stlen, n->stptr);
else if ((n->flags & STRING) != 0) {
pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', false);
print_func(fp, "\n");
@@ -7425,9 +7426,9 @@ make_regnode(int type, NODE *exp)
getnode(n);
memset(n, 0, sizeof(NODE));
n->type = type;
- n->re_cnt = 1;
- if (type == Node_regex || type == Node_typedregex) {
+ if (type == Node_regex) {
+ n->re_cnt = 1;
n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, false);
if (n->re_reg == NULL) {
freenode(n);
@@ -7436,6 +7437,14 @@ make_regnode(int type, NODE *exp)
n->re_exp = exp;
n->re_flags = CONSTANT;
n->valref = 1;
+ } else if (type == Node_val) {
+ exp->tre_regs = make_regexp(exp->stptr, exp->stlen, false, true, false);
+ exp->flags |= REGEX|MALLOC|STRCUR|NUMCUR;
+ exp->numbr = 0;
+ exp->flags &= ~(STRING|NUMBER);
+ exp->valref = 1;
+ unref(n);
+ n = exp;
}
return n;
}
diff --git a/awkgram.y b/awkgram.y
index 7fc1f279..4df22a64 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -525,7 +525,7 @@ typed_regexp
len = strlen(re);
exp = make_str_node(re, len, ALREADY_MALLOCED);
- n = make_regnode(Node_typedregex, exp);
+ n = make_regnode(Node_val, exp);
if (n == NULL) {
unref(exp);
YYABORT;
@@ -1276,7 +1276,7 @@ case_value
}
| typed_regexp
{
- assert($1->memory->type == Node_typedregex);
+ assert(($1->memory->flags & REGEX) == REGEX);
$1->opcode = Op_push_re;
$$ = $1;
}
@@ -1491,7 +1491,7 @@ exp
_("regular expression on left of `~' or `!~' operator"));
assert($3->opcode == Op_push_re
- && $3->memory->type == Node_typedregex);
+ && ($3->memory->flags & REGEX) != 0);
/* RHS is @/.../ */
$2->memory = $3->memory;
bcfree($3);
@@ -3416,6 +3416,7 @@ yylex(void)
lexeme = lexptr;
thisline = NULL;
+
collect_regexp:
if (want_regexp) {
int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
@@ -4631,8 +4632,8 @@ valinfo(NODE *n, Func_print print_func, FILE *fp)
{
if (n == Nnull_string)
print_func(fp, "uninitialized scalar\n");
- else if (n->type == Node_typedregex)
- print_func(fp, "@/%.*s/\n", n->re_exp->stlen, n->re_exp->stptr);
+ else if ((n->flags & REGEX) != 0)
+ print_func(fp, "@/%.*s/\n", n->stlen, n->stptr);
else if ((n->flags & STRING) != 0) {
pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', false);
print_func(fp, "\n");
@@ -5005,9 +5006,9 @@ make_regnode(int type, NODE *exp)
getnode(n);
memset(n, 0, sizeof(NODE));
n->type = type;
- n->re_cnt = 1;
- if (type == Node_regex || type == Node_typedregex) {
+ if (type == Node_regex) {
+ n->re_cnt = 1;
n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, false);
if (n->re_reg == NULL) {
freenode(n);
@@ -5016,6 +5017,14 @@ make_regnode(int type, NODE *exp)
n->re_exp = exp;
n->re_flags = CONSTANT;
n->valref = 1;
+ } else if (type == Node_val) {
+ exp->tre_regs = make_regexp(exp->stptr, exp->stlen, false, true, false);
+ exp->flags |= REGEX|MALLOC|STRCUR|NUMCUR;
+ exp->numbr = 0;
+ exp->flags &= ~(STRING|NUMBER);
+ exp->valref = 1;
+ unref(n);
+ n = exp;
}
return n;
}
diff --git a/builtin.c b/builtin.c
index a7fe6312..001dd6e2 100644
--- a/builtin.c
+++ b/builtin.c
@@ -536,7 +536,7 @@ do_length(int nargs)
return make_number(size);
}
- assert(tmp->type == Node_val || tmp->type == Node_typedregex);
+ assert(tmp->type == Node_val);
if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
lintwarn(_("length: received non-string argument"));
@@ -2195,11 +2195,9 @@ do_print(int nargs, int redirtype)
fatal(_("attempt to use array `%s' in a scalar context"), array_vname(tmp));
}
- if (tmp->type == Node_typedregex)
- args_array[i] = force_string(tmp);
- else if ( (tmp->flags & STRCUR) == 0
- || ( tmp->stfmt != STFMT_UNUSED
- && tmp->stfmt != OFMTidx))
+ if ( (tmp->flags & STRCUR) == 0
+ || ( tmp->stfmt != STFMT_UNUSED
+ && tmp->stfmt != OFMTidx))
args_array[i] = format_val(OFMT, OFMTidx, tmp);
}
@@ -3203,7 +3201,7 @@ call_sub(const char *name, int nargs)
* push replace
* push $0
*/
- if (regex->type != Node_typedregex)
+ if ((regex->flags & REGEX) == 0)
regex = make_regnode(Node_regex, regex);
PUSH(regex);
PUSH(replace);
@@ -3228,7 +3226,7 @@ call_sub(const char *name, int nargs)
* nargs++
* }
*/
- if (regex->type != Node_typedregex)
+ if ((regex->flags & REGEX) == 0)
regex = make_regnode(Node_regex, regex);
PUSH(regex);
PUSH(replace);
@@ -3266,7 +3264,7 @@ call_match(int nargs)
/* Don't need to pop the string just to push it back ... */
- if (regex->type != Node_typedregex)
+ if ((regex->flags & REGEX) == 0)
regex = make_regnode(Node_regex, regex);
PUSH(regex);
@@ -3295,7 +3293,7 @@ call_split_func(const char *name, int nargs)
if (nargs >= 3) {
regex = POP_STRING();
- if (regex->type != Node_typedregex)
+ if ((regex->flags & REGEX) == 0)
regex = make_regnode(Node_regex, regex);
} else {
if (name[0] == 's') {
@@ -3955,12 +3953,9 @@ do_typeof(int nargs)
res = "array";
deref = false;
break;
- case Node_typedregex:
- res = "regexp";
- break;
case Node_val:
case Node_var:
- switch (arg->flags & (STRING|NUMBER|MAYBE_NUM)) {
+ switch (arg->flags & (STRING|NUMBER|MAYBE_NUM|REGEX)) {
case STRING:
res = "string";
break;
@@ -3970,6 +3965,9 @@ do_typeof(int nargs)
case STRING|MAYBE_NUM:
res = "strnum";
break;
+ case REGEX:
+ res = "regexp";
+ break;
case NUMBER|STRING:
if (arg == Nnull_string) {
res = "unassigned";
diff --git a/debug.c b/debug.c
index 70ba82e2..6d591cb5 100644
--- a/debug.c
+++ b/debug.c
@@ -1736,8 +1736,6 @@ watchpoint_triggered(struct list_item *w)
/* new != NULL */
if (t2->type == Node_val)
w->cur_value = dupnode(t2);
- else if (t2->type == Node_typedregex)
- w->cur_value = dupnode(t2);
else {
w->flags |= CUR_IS_ARRAY;
w->cur_size = (t2->type == Node_var_array) ? assoc_length(t2) : 0;
@@ -1750,7 +1748,6 @@ watchpoint_triggered(struct list_item *w)
w->flags |= CUR_IS_ARRAY;
w->cur_size = assoc_length(t2);
} else
- /* works for Node_typedregex too */
w->cur_value = dupnode(t2);
}
@@ -1793,7 +1790,7 @@ initialize_watch_item(struct list_item *w)
} else if (symbol->type == Node_var_array) {
w->flags |= CUR_IS_ARRAY;
w->cur_size = assoc_length(symbol);
- } else if (symbol->type == Node_typedregex) {
+ } else if (symbol->type == Node_val && (symbol->flags & REGEX) != 0) {
w->cur_value = dupnode(symbol);
} /* else
can't happen */
@@ -3708,14 +3705,14 @@ print_memory(NODE *m, NODE *func, Func_print print_func, FILE *fp)
print_func(fp, "%g", m->numbr);
} else if ((m->flags & STRING) != 0)
pp_string_fp(print_func, fp, m->stptr, m->stlen, '"', false);
- else
+ else if ((m->flags & REGEX) != 0) {
+ print_func(fp, "@");
+ pp_string_fp(print_func, fp, m->stptr, m->stlen, '/', false);
+ } else
print_func(fp, "-?-");
print_func(fp, " [%s]", flags2str(m->flags));
break;
- case Node_typedregex:
- print_func(fp, "@");
- /* fall through */
case Node_regex:
pp_string_fp(print_func, fp, m->re_exp->stptr, m->re_exp->stlen, '/', false);
break;
diff --git a/eval.c b/eval.c
index 00da9ee5..813a306c 100644
--- a/eval.c
+++ b/eval.c
@@ -236,7 +236,6 @@ static const char *const nodetypes[] = {
"Node_val",
"Node_regex",
"Node_dynregex",
- "Node_typedregex",
"Node_var",
"Node_var_array",
"Node_var_new",
@@ -451,6 +450,7 @@ flags2str(int flagval)
{ HALFHAT, "HALFHAT" },
{ XARRAY, "XARRAY" },
{ NUMCONSTSTR, "NUMCONSTSTR" },
+ { REGEX, "REGEX" },
{ 0, NULL },
};
@@ -1330,11 +1330,6 @@ setup_frame(INSTRUCTION *pc)
r->var_value = m;
break;
- case Node_typedregex:
- r->type = Node_var;
- r->var_value = m;
- break;
-
default:
cant_happen();
}
diff --git a/interpret.h b/interpret.h
index 9d3c1087..9b737617 100644
--- a/interpret.h
+++ b/interpret.h
@@ -268,7 +268,7 @@ uninitialized_scalar:
r = r->var_value;
}
- if (r->type == Node_val || r->type == Node_typedregex)
+ if (r->type == Node_val)
UPREF(r);
PUSH(r);
break;
@@ -991,7 +991,8 @@ arrayfor:
r = POP_STRING();
unref(m->re_exp);
m->re_exp = r;
- } else if (m->type == Node_typedregex) {
+ } else if (m->type == Node_val) {
+ assert((m->flags & REGEX) != 0);
UPREF(m);
}
PUSH(m);
diff --git a/profile.c b/profile.c
index c60fd296..f27f9dda 100644
--- a/profile.c
+++ b/profile.c
@@ -32,8 +32,8 @@ static void parenthesize(int type, NODE *left, NODE *right);
static char *pp_list(int nargs, const char *paren, const char *delim);
static char *pp_group3(const char *s1, const char *s2, const char *s3);
static char *pp_concat(int nargs);
-static char *pp_string_or_strong_regex(const char *in_str, size_t len, int delim, bool strong_regex);
-static char *pp_strong_regex(const char *in_str, size_t len, int delim);
+static char *pp_string_or_typed_regex(const char *in_str, size_t len, int delim, bool typed_regex);
+static char *pp_typed_regex(const char *in_str, size_t len, int delim);
static bool is_binary(int type);
static bool is_scalar(int type);
static int prec_level(int type);
@@ -640,7 +640,7 @@ cleanup:
break;
case Op_push_re:
- if (pc->memory->type != Node_regex && pc->memory->type != Node_typedregex)
+ if (pc->memory->type != Node_regex && (pc->memory->flags & REGEX) == 0)
break;
/* else
fall through */
@@ -650,7 +650,7 @@ cleanup:
if (pc->memory->type == Node_regex)
str = pp_string(re->stptr, re->stlen, '/');
else
- str = pp_strong_regex(re->stptr, re->stlen, '/');
+ str = pp_typed_regex(re->stptr, re->stlen, '/');
pp_push(pc->opcode, str, CAN_FREE);
}
break;
@@ -672,9 +672,9 @@ cleanup:
txt = t2->pp_str;
str = pp_group3(txt, op2str(pc->opcode), restr);
pp_free(t2);
- } else if (m->type == Node_typedregex) {
+ } else if (m->type == Node_val && (m->flags & REGEX) != 0) {
NODE *re = m->re_exp;
- restr = pp_strong_regex(re->stptr, re->stlen, '/');
+ restr = pp_typed_regex(re->stptr, re->stlen, '/');
str = pp_group3(txt, op2str(pc->opcode), restr);
efree(restr);
} else {
@@ -1416,21 +1416,21 @@ parenthesize(int type, NODE *left, NODE *right)
char *
pp_string(const char *in_str, size_t len, int delim)
{
- return pp_string_or_strong_regex(in_str, len, delim, false);
+ return pp_string_or_typed_regex(in_str, len, delim, false);
}
-/* pp_strong_regex --- pretty format a hard regex constant */
+/* pp_typed_regex --- pretty format a hard regex constant */
static char *
-pp_strong_regex(const char *in_str, size_t len, int delim)
+pp_typed_regex(const char *in_str, size_t len, int delim)
{
- return pp_string_or_strong_regex(in_str, len, delim, true);
+ return pp_string_or_typed_regex(in_str, len, delim, true);
}
-/* pp_string_or_strong_regex --- pretty format a string, regex, or hard regex constant */
+/* pp_string_or_typed_regex --- pretty format a string, regex, or typed regex constant */
char *
-pp_string_or_strong_regex(const char *in_str, size_t len, int delim, bool strong_regex)
+pp_string_or_typed_regex(const char *in_str, size_t len, int delim, bool typed_regex)
{
static char str_escapes[] = "\a\b\f\n\r\t\v\\";
static char str_printables[] = "abfnrtv\\";
@@ -1464,12 +1464,12 @@ pp_string_or_strong_regex(const char *in_str, size_t len, int delim, bool strong
} ofre -= (l)
/* initial size; 3 for delim + terminating null, 1 for @ */
- osiz = len + 3 + 1 + (strong_regex == true);
+ osiz = len + 3 + 1 + (typed_regex == true);
emalloc(obuf, char *, osiz, "pp_string");
obufout = obuf;
ofre = osiz - 1;
- if (strong_regex)
+ if (typed_regex)
*obufout++ = '@';
*obufout++ = delim;
diff --git a/re.c b/re.c
index 4360d307..e4191a1c 100644
--- a/re.c
+++ b/re.c
@@ -345,18 +345,17 @@ re_update(NODE *t)
{
NODE *t1;
+ if (t->type == Node_val && (t->flags & REGEX) != 0)
+ return t->tre_regs;
+
if ((t->re_flags & CASE) == IGNORECASE) {
/* regex was compiled with settings matching IGNORECASE */
if ((t->re_flags & CONSTANT) != 0) {
/* it's a constant, so just return it as is */
- assert(t->type == Node_regex || t->type == Node_typedregex);
+ assert(t->type == Node_regex);
return t->re_reg;
}
t1 = t->re_exp;
- if (t1->type == Node_typedregex) {
- assert((t1->re_flags & CONSTANT) != 0);
- return t1->re_reg;
- }
if (t->re_text != NULL) {
/* if contents haven't changed, just return it */
if (cmp_nodes(t->re_text, t1, true) == 0)