summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2011-07-15 15:35:34 +0300
committerArnold D. Robbins <arnold@skeeve.com>2011-07-15 15:35:34 +0300
commit46f2db24d9e7f792f60149f5ee89ef4f22e3f4a9 (patch)
tree4522982b751d8643ee05022e60bdac757dfd0956
parent84658669a180b3f1e63d20b6ea166f7c5733786b (diff)
downloadgawk-46f2db24d9e7f792f60149f5ee89ef4f22e3f4a9.tar.gz
Fix gsub losing white space when working on fields.
-rw-r--r--ChangeLog18
-rw-r--r--awk.h15
-rw-r--r--awkgram.c153
-rw-r--r--awkgram.y153
-rw-r--r--builtin.c207
-rw-r--r--debug.c32
-rw-r--r--eval.c36
-rw-r--r--profile.c14
-rw-r--r--test/ChangeLog5
-rw-r--r--test/Makefile.am4
-rw-r--r--test/Makefile.in9
-rw-r--r--test/Maketests5
12 files changed, 388 insertions, 263 deletions
diff --git a/ChangeLog b/ChangeLog
index 0f01099a..2acb0567 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2011-07-15 John Haque <j.eh@mchsi.com>
+
+ * awk.h (Op_sub_builtin): New opcode.
+ (GSUB, GENSUB, AFTER_ASSIGN, LITERAL): New flags for
+ Op_sub_builtin.
+ * awkgram.y (struct tokentab): Change opcode to Op_sub_builtin
+ for sub, gsub and gensub.
+ (snode): Update processing of sub, gsub and gensub.
+ * builtin.c (do_sub, do_gsub, do_gensub): Nuke.
+ (sub_common): Renamed to do_sub. Relocate gensub argument
+ handling code from do_gensub to here; Simplify the code a
+ little bit.
+ * eval.c (r_interpret): Handle Op_sub_builtin. Avoid field
+ re-splitting or $0 rebuilding if (g)sub target string is
+ a field and no substitutions were done.
+ * pprint (profile.c): Add case for the new opcode.
+ * print_instruction (debug.c): Ditto.
+
2011-07-15 Arnold D. Robbins <arnold@skeeve.com>
* awk.h: Typo fix: "loner" --> longer. Thanks to Nelson Beebe.
diff --git a/awk.h b/awk.h
index 2d13a213..d9da3b98 100644
--- a/awk.h
+++ b/awk.h
@@ -521,6 +521,7 @@ typedef enum opcodeval {
Op_K_nextfile,
Op_builtin,
+ Op_sub_builtin, /* sub, gsub and gensub */
Op_in_array, /* boolean test of membership in array */
/* function call instruction */
@@ -626,6 +627,16 @@ typedef struct exp_instruction {
#define target_jmp d.di
#define target_break x.xi
+/* Op_sub_builtin */
+#define sub_flags d.dl
+#define GSUB 0x01 /* builtin is gsub */
+#define GENSUB 0x02 /* builtin is gensub */
+#define AFTER_ASSIGN 0x04 /* (g)sub target is a field or a special var with
+ * set_XX routine.
+ */
+#define LITERAL 0x08 /* target is a literal string */
+
+
/* Op_K_exit */
#define target_end d.di
#define target_atexit x.xi
@@ -1181,9 +1192,7 @@ extern NODE *do_cos(int nargs);
extern NODE *do_rand(int nargs);
extern NODE *do_srand(int nargs);
extern NODE *do_match(int nargs);
-extern NODE *do_gsub(int nargs);
-extern NODE *do_sub(int nargs);
-extern NODE *do_gensub(int nargs);
+extern NODE *do_sub(int nargs, unsigned int flags, int *num_matches);
extern NODE *format_tree(const char *, size_t, NODE **, long);
extern NODE *do_lshift(int nargs);
extern NODE *do_rshift(int nargs);
diff --git a/awkgram.c b/awkgram.c
index 4edec572..876821d5 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -4485,6 +4485,7 @@ struct token {
# define RESX 0x0800 /* Bell Labs Research extension */
# define BREAK 0x1000 /* break allowed inside */
# define CONTINUE 0x2000 /* continue allowed inside */
+
NODE *(*ptr)(int); /* function that implements this keyword */
};
@@ -4542,9 +4543,9 @@ static const struct token tokentab[] = {
{"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0},
{"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
{"function",Op_func, LEX_FUNCTION, NOT_OLD, 0},
-{"gensub", Op_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub},
+{"gensub", Op_sub_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), 0},
{"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0},
-{"gsub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
+{"gsub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0},
{"if", Op_K_if, LEX_IF, 0, 0},
{"in", Op_symbol, LEX_IN, 0, 0},
{"include", Op_symbol, LEX_INCLUDE, GAWKX, 0},
@@ -4575,7 +4576,7 @@ static const struct token tokentab[] = {
#endif
{"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime},
{"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum},
-{"sub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
+{"sub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0},
{"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
{"switch", Op_K_switch, LEX_SWITCH, GAWKX|BREAK, 0},
{"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
@@ -6286,8 +6287,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
assert(nexp > 0);
}
- r->builtin = tokentab[idx].ptr;
-
/* check against how many args. are allowed for this builtin */
args_allowed = tokentab[idx].flags & ARGS;
if (args_allowed && (args_allowed & A(nexp)) == 0) {
@@ -6296,7 +6295,86 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
return NULL;
}
+ /* special processing for sub, gsub and gensub */
+
+ if (tokentab[idx].value == Op_sub_builtin) {
+ const char *operator = tokentab[idx].operator;
+
+ r->sub_flags = 0;
+
+ arg = subn->nexti; /* first arg list */
+ (void) mk_rexp(arg);
+
+ if (strcmp(operator, "gensub") != 0) {
+ /* sub and gsub */
+
+ if (strcmp(operator, "gsub") == 0)
+ r->sub_flags |= GSUB;
+
+ arg = arg->lasti->nexti; /* 2nd arg list */
+ if (nexp == 2) {
+ INSTRUCTION *expr;
+
+ expr = list_create(instruction(Op_push_i));
+ expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
+ (void) mk_expression_list(subn,
+ list_append(expr, instruction(Op_field_spec)));
+ }
+
+ arg = arg->lasti->nexti; /* third arg list */
+ ip = arg->lasti;
+ if (ip->opcode == Op_push_i) {
+ if (do_lint)
+ lintwarn(_("%s: string literal as last arg of substitute has no effect"),
+ operator);
+ r->sub_flags |= LITERAL;
+ } else {
+ if (make_assignable(ip) == NULL)
+ yyerror(_("%s third parameter is not a changeable object"),
+ operator);
+ else
+ ip->do_reference = TRUE;
+ }
+
+ r->expr_count = count_expressions(&subn, FALSE);
+ ip = subn->lasti;
+
+ (void) list_append(subn, r);
+
+ /* add after_assign code */
+ if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
+ (void) list_append(subn, instruction(Op_var_assign));
+ subn->lasti->memory = ip->memory;
+ subn->lasti->assign_var = ip->memory->var_assign;
+ r->sub_flags |= AFTER_ASSIGN;
+ } else if (ip->opcode == Op_field_spec_lhs) {
+ (void) list_append(subn, instruction(Op_field_assign));
+ subn->lasti->field_assign = (Func_ptr) 0;
+ ip->target_assign = subn->lasti;
+ r->sub_flags |= AFTER_ASSIGN;
+ }
+ return subn;
+
+ } else {
+ /* gensub */
+
+ r->sub_flags |= GENSUB;
+ if (nexp == 3) {
+ ip = instruction(Op_push_i);
+ ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
+ (void) mk_expression_list(subn,
+ list_append(list_create(ip), instruction(Op_field_spec)));
+ }
+
+ r->expr_count = count_expressions(&subn, FALSE);
+ return list_append(subn, r);
+ }
+ }
+
+ r->builtin = tokentab[idx].ptr;
+
/* special case processing for a few builtins */
+
if (r->builtin == do_length) {
if (nexp == 0) {
/* no args. Use $0 */
@@ -6338,71 +6416,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
if (/*ip == arg->nexti && */ ip->opcode == Op_push)
ip->opcode = Op_push_array;
}
- } else if (r->builtin == do_sub || r->builtin == do_gsub) {
- int literal = FALSE;
-
- arg = subn->nexti; /* first arg list */
- (void) mk_rexp(arg);
-
- arg = arg->lasti->nexti; /* 2nd arg list */
- if (nexp == 2) {
- INSTRUCTION *expr;
- expr = list_create(instruction(Op_push_i));
- expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
- (void) mk_expression_list(subn,
- list_append(expr, instruction(Op_field_spec)));
- }
-
- arg = arg->lasti->nexti; /* third arg list */
- ip = arg->lasti;
- if (ip->opcode == Op_push_i) {
- if (do_lint)
- lintwarn(_("%s: string literal as last arg of substitute has no effect"),
- (r->builtin == do_sub) ? "sub" : "gsub");
- literal = TRUE;
- } else {
- if (make_assignable(ip) == NULL)
- yyerror(_("%s third parameter is not a changeable object"),
- (r->builtin == do_sub) ? "sub" : "gsub");
- else
- ip->do_reference = TRUE;
- }
-
- /* kludge: This is one of the few cases
- * when we need to know the type of item on stack.
- * In case of string literal as the last argument,
- * pass 4 as # of args (See sub_common code in builtin.c).
- * Other cases like length(array or scalar) seem
- * to work out ok.
- */
-
- r->expr_count = count_expressions(&subn, FALSE) + !!literal;
- ip = subn->lasti;
-
- (void) list_append(subn, r);
-
- /* add after_assign bytecode(s) */
- if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
- (void) list_append(subn, instruction(Op_var_assign));
- subn->lasti->memory = ip->memory;
- subn->lasti->assign_var = ip->memory->var_assign;
- } else if (ip->opcode == Op_field_spec_lhs) {
- (void) list_append(subn, instruction(Op_field_assign));
- subn->lasti->field_assign = (Func_ptr) 0;
- ip->target_assign = subn->lasti;
- }
- return subn;
- } else if (r->builtin == do_gensub) {
- if (nexp == 3) {
- arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */
- ip = instruction(Op_push_i);
- ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
- (void) mk_expression_list(subn,
- list_append(list_create(ip),
- instruction(Op_field_spec)));
- }
- arg = subn->nexti; /* first arg list */
- (void) mk_rexp(arg);
} else if (r->builtin == do_split) {
arg = subn->nexti->lasti->nexti; /* 2nd arg list */
ip = arg->lasti;
diff --git a/awkgram.y b/awkgram.y
index 6b28b520..3ef6382e 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -1795,6 +1795,7 @@ struct token {
# define RESX 0x0800 /* Bell Labs Research extension */
# define BREAK 0x1000 /* break allowed inside */
# define CONTINUE 0x2000 /* continue allowed inside */
+
NODE *(*ptr)(int); /* function that implements this keyword */
};
@@ -1852,9 +1853,9 @@ static const struct token tokentab[] = {
{"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0},
{"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
{"function",Op_func, LEX_FUNCTION, NOT_OLD, 0},
-{"gensub", Op_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub},
+{"gensub", Op_sub_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), 0},
{"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0},
-{"gsub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
+{"gsub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0},
{"if", Op_K_if, LEX_IF, 0, 0},
{"in", Op_symbol, LEX_IN, 0, 0},
{"include", Op_symbol, LEX_INCLUDE, GAWKX, 0},
@@ -1885,7 +1886,7 @@ static const struct token tokentab[] = {
#endif
{"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime},
{"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum},
-{"sub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
+{"sub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0},
{"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
{"switch", Op_K_switch, LEX_SWITCH, GAWKX|BREAK, 0},
{"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
@@ -3596,8 +3597,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
assert(nexp > 0);
}
- r->builtin = tokentab[idx].ptr;
-
/* check against how many args. are allowed for this builtin */
args_allowed = tokentab[idx].flags & ARGS;
if (args_allowed && (args_allowed & A(nexp)) == 0) {
@@ -3606,7 +3605,86 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
return NULL;
}
+ /* special processing for sub, gsub and gensub */
+
+ if (tokentab[idx].value == Op_sub_builtin) {
+ const char *operator = tokentab[idx].operator;
+
+ r->sub_flags = 0;
+
+ arg = subn->nexti; /* first arg list */
+ (void) mk_rexp(arg);
+
+ if (strcmp(operator, "gensub") != 0) {
+ /* sub and gsub */
+
+ if (strcmp(operator, "gsub") == 0)
+ r->sub_flags |= GSUB;
+
+ arg = arg->lasti->nexti; /* 2nd arg list */
+ if (nexp == 2) {
+ INSTRUCTION *expr;
+
+ expr = list_create(instruction(Op_push_i));
+ expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
+ (void) mk_expression_list(subn,
+ list_append(expr, instruction(Op_field_spec)));
+ }
+
+ arg = arg->lasti->nexti; /* third arg list */
+ ip = arg->lasti;
+ if (ip->opcode == Op_push_i) {
+ if (do_lint)
+ lintwarn(_("%s: string literal as last arg of substitute has no effect"),
+ operator);
+ r->sub_flags |= LITERAL;
+ } else {
+ if (make_assignable(ip) == NULL)
+ yyerror(_("%s third parameter is not a changeable object"),
+ operator);
+ else
+ ip->do_reference = TRUE;
+ }
+
+ r->expr_count = count_expressions(&subn, FALSE);
+ ip = subn->lasti;
+
+ (void) list_append(subn, r);
+
+ /* add after_assign code */
+ if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
+ (void) list_append(subn, instruction(Op_var_assign));
+ subn->lasti->memory = ip->memory;
+ subn->lasti->assign_var = ip->memory->var_assign;
+ r->sub_flags |= AFTER_ASSIGN;
+ } else if (ip->opcode == Op_field_spec_lhs) {
+ (void) list_append(subn, instruction(Op_field_assign));
+ subn->lasti->field_assign = (Func_ptr) 0;
+ ip->target_assign = subn->lasti;
+ r->sub_flags |= AFTER_ASSIGN;
+ }
+ return subn;
+
+ } else {
+ /* gensub */
+
+ r->sub_flags |= GENSUB;
+ if (nexp == 3) {
+ ip = instruction(Op_push_i);
+ ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
+ (void) mk_expression_list(subn,
+ list_append(list_create(ip), instruction(Op_field_spec)));
+ }
+
+ r->expr_count = count_expressions(&subn, FALSE);
+ return list_append(subn, r);
+ }
+ }
+
+ r->builtin = tokentab[idx].ptr;
+
/* special case processing for a few builtins */
+
if (r->builtin == do_length) {
if (nexp == 0) {
/* no args. Use $0 */
@@ -3648,71 +3726,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
if (/*ip == arg->nexti && */ ip->opcode == Op_push)
ip->opcode = Op_push_array;
}
- } else if (r->builtin == do_sub || r->builtin == do_gsub) {
- int literal = FALSE;
-
- arg = subn->nexti; /* first arg list */
- (void) mk_rexp(arg);
-
- arg = arg->lasti->nexti; /* 2nd arg list */
- if (nexp == 2) {
- INSTRUCTION *expr;
- expr = list_create(instruction(Op_push_i));
- expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
- (void) mk_expression_list(subn,
- list_append(expr, instruction(Op_field_spec)));
- }
-
- arg = arg->lasti->nexti; /* third arg list */
- ip = arg->lasti;
- if (ip->opcode == Op_push_i) {
- if (do_lint)
- lintwarn(_("%s: string literal as last arg of substitute has no effect"),
- (r->builtin == do_sub) ? "sub" : "gsub");
- literal = TRUE;
- } else {
- if (make_assignable(ip) == NULL)
- yyerror(_("%s third parameter is not a changeable object"),
- (r->builtin == do_sub) ? "sub" : "gsub");
- else
- ip->do_reference = TRUE;
- }
-
- /* kludge: This is one of the few cases
- * when we need to know the type of item on stack.
- * In case of string literal as the last argument,
- * pass 4 as # of args (See sub_common code in builtin.c).
- * Other cases like length(array or scalar) seem
- * to work out ok.
- */
-
- r->expr_count = count_expressions(&subn, FALSE) + !!literal;
- ip = subn->lasti;
-
- (void) list_append(subn, r);
-
- /* add after_assign bytecode(s) */
- if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
- (void) list_append(subn, instruction(Op_var_assign));
- subn->lasti->memory = ip->memory;
- subn->lasti->assign_var = ip->memory->var_assign;
- } else if (ip->opcode == Op_field_spec_lhs) {
- (void) list_append(subn, instruction(Op_field_assign));
- subn->lasti->field_assign = (Func_ptr) 0;
- ip->target_assign = subn->lasti;
- }
- return subn;
- } else if (r->builtin == do_gensub) {
- if (nexp == 3) {
- arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */
- ip = instruction(Op_push_i);
- ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
- (void) mk_expression_list(subn,
- list_append(list_create(ip),
- instruction(Op_field_spec)));
- }
- arg = subn->nexti; /* first arg list */
- (void) mk_rexp(arg);
} else if (r->builtin == do_split) {
arg = subn->nexti->lasti->nexti; /* 2nd arg list */
ip = arg->lasti;
diff --git a/builtin.c b/builtin.c
index 1d841dc0..5b75979c 100644
--- a/builtin.c
+++ b/builtin.c
@@ -72,7 +72,6 @@ extern NODE **fields_arr;
extern int output_is_tty;
extern FILE *output_fp;
-static NODE *sub_common(int nargs, long how_many, int backdigs);
#define POP_TWO_SCALARS(s1, s2) \
s2 = POP_SCALAR(); \
@@ -2319,7 +2318,7 @@ do_match(int nargs)
return make_number((AWKNUM) rstart);
}
-/* sub_common --- the common code (does the work) for sub, gsub, and gensub */
+/* do_sub --- do the work for sub, gsub, and gensub */
/*
* Gsub can be tricksy; particularly when handling the case of null strings.
@@ -2412,12 +2411,12 @@ do_match(int nargs)
* NB: `howmany' conflicts with a SunOS 4.x macro in <sys/param.h>.
*/
-static NODE *
-sub_common(int nargs, long how_many, int backdigs)
+NODE *
+do_sub(int nargs, unsigned int flags, int *num_matches)
{
char *scan;
char *bp, *cp;
- char *buf;
+ char *buf = NULL;
size_t buflen;
char *matchend;
size_t len;
@@ -2434,38 +2433,77 @@ sub_common(int nargs, long how_many, int backdigs)
NODE *s; /* subst. pattern */
NODE *t; /* string to make sub. in; $0 if none given */
NODE *tmp;
- NODE **lhs;
- int global = (how_many == -1);
+ NODE **lhs = NULL;
+ long how_many = 1; /* one substitution for sub, also gensub default */
+ int global;
long current;
int lastmatchnonzero;
char *mb_indices = NULL;
-
- tmp = PEEK(2); /* take care of regexp early, in case re_update is fatal */
- rp = re_update(tmp);
- /* original string */
- if (nargs == 4) { /* kludge: no of items on stack is really 3,
- * See snode(..) in awkgram.y
- */
- lhs = NULL;
- t = POP_STRING();
+ if ((flags & GENSUB) != 0) {
+ double d;
+ NODE *t1;
+
+ tmp = PEEK(3);
+ rp = re_update(tmp);
+
+ t = POP_STRING(); /* original string */
+
+ t1 = POP_SCALAR(); /* value of global flag */
+ if ((t1->flags & (STRCUR|STRING)) != 0) {
+ if (t1->stlen > 0 && (t1->stptr[0] == 'g' || t1->stptr[0] == 'G'))
+ how_many = -1;
+ else {
+ d = force_number(t1);
+
+ if ((t1->flags & NUMCUR) != 0)
+ goto set_how_many;
+
+ how_many = 1;
+ }
+ } else {
+ d = force_number(t1);
+set_how_many:
+ if (d < 1)
+ how_many = 1;
+ else if (d < LONG_MAX)
+ how_many = d;
+ else
+ how_many = LONG_MAX;
+ if (d == 0)
+ warning(_("gensub: third argument of 0 treated as 1"));
+ }
+ DEREF(t1);
+
} else {
- lhs = POP_ADDRESS();
- t = force_string(*lhs);
+
+ /* take care of regexp early, in case re_update is fatal */
+
+ tmp = PEEK(2);
+ rp = re_update(tmp);
+
+ if ((flags & GSUB) != 0)
+ how_many = -1;
+
+ /* original string */
+
+ if ((flags & LITERAL) != 0)
+ t = POP_STRING();
+ else {
+ lhs = POP_ADDRESS();
+ t = force_string(*lhs);
+ }
}
+ global = (how_many == -1);
- s = POP_STRING(); /* replacement text */
+ s = POP_STRING(); /* replacement text */
decr_sp(); /* regexp, already updated above */
/* do the search early to avoid work on non-match */
if (research(rp, t->stptr, 0, t->stlen, RE_NEED_START) == -1 ||
- RESTART(rp, t->stptr) > t->stlen) {
- if (lhs == NULL)
- DEREF(t);
- DEREF(s);
- return make_number((AWKNUM) 0.0);
- }
+ RESTART(rp, t->stptr) > t->stlen)
+ goto done;
t->flags |= STRING;
@@ -2476,7 +2514,7 @@ sub_common(int nargs, long how_many, int backdigs)
repl = s->stptr;
replend = repl + s->stlen;
repllen = replend - repl;
- emalloc(buf, char *, buflen + 2, "sub_common");
+ emalloc(buf, char *, buflen + 2, "do_sub");
buf[buflen] = '\0';
buf[buflen + 1] = '\0';
ampersands = 0;
@@ -2490,7 +2528,7 @@ sub_common(int nargs, long how_many, int backdigs)
* for example.
*/
if (gawk_mb_cur_max > 1 && repllen > 0) {
- emalloc(mb_indices, char *, repllen * sizeof(char), "sub_common");
+ emalloc(mb_indices, char *, repllen * sizeof(char), "do_sub");
index_multibyte_buffer(repl, mb_indices, repllen);
}
@@ -2500,7 +2538,7 @@ sub_common(int nargs, long how_many, int backdigs)
repllen--;
ampersands++;
} else if (*scan == '\\') {
- if (backdigs) { /* gensub, behave sanely */
+ if (flags & GENSUB) { /* gensub, behave sanely */
if (isdigit((unsigned char) scan[1])) {
ampersands++;
scan++;
@@ -2575,7 +2613,7 @@ sub_common(int nargs, long how_many, int backdigs)
&& (gawk_mb_cur_max == 1
|| (repllen > 0 && mb_indices[scan - repl] == 1))
) {
- if (backdigs) { /* gensub, behave sanely */
+ if (flags & GENSUB) { /* gensub, behave sanely */
if (isdigit((unsigned char) scan[1])) {
int dig = scan[1] - '0';
if (dig < NUMSUBPATS(rp, t->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) {
@@ -2619,7 +2657,7 @@ sub_common(int nargs, long how_many, int backdigs)
textlen = text + textlen - matchend;
text = matchend;
- if ((current >= how_many && !global)
+ if ((current >= how_many && ! global)
|| ((long) textlen <= 0 && matchstart == matchend)
|| research(rp, t->stptr, text - t->stptr, textlen, RE_NEED_START) == -1)
break;
@@ -2628,7 +2666,7 @@ sub_common(int nargs, long how_many, int backdigs)
sofar = bp - buf;
if (buflen - sofar - textlen - 1) {
buflen = sofar + textlen + 2;
- erealloc(buf, char *, buflen, "sub_common");
+ erealloc(buf, char *, buflen, "do_sub");
bp = buf + sofar;
}
for (scan = matchend; scan < text + textlen; scan++)
@@ -2636,102 +2674,39 @@ sub_common(int nargs, long how_many, int backdigs)
*bp = '\0';
textlen = bp - buf;
- DEREF(s);
-
- if (lhs != NULL) {
- if (matches > 0) {
- unref(*lhs);
- *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
- } else
- efree(buf);
- } else {
- efree(buf);
- DEREF(t);
- }
-
if (mb_indices != NULL)
efree(mb_indices);
- return make_number((AWKNUM) matches);
-}
-
-/* do_gsub --- global substitution */
-
-NODE *
-do_gsub(int nargs)
-{
- return sub_common(nargs, -1, FALSE);
-}
-
-/* do_sub --- single substitution */
-
-NODE *
-do_sub(int nargs)
-{
- return sub_common(nargs, 1, FALSE);
-}
-
-/* do_gensub --- fix up the tree for sub_common for the gensub function */
-
-NODE *
-do_gensub(int nargs)
-{
- NODE *t, *tmp, *target, *ret;
- long how_many = 1; /* default is one substitution */
- double d;
-
- tmp = POP_STRING(); /* target */
- t = POP_SCALAR(); /* value of global flag */
-
- /*
- * We make copy of the original target string, and pass that
- * in to sub_common() as the target to make the substitution in.
- * We will then return the result string as the return value of
- * this function.
- */
-
- target = make_string(tmp->stptr, tmp->stlen);
- DEREF(tmp);
- PUSH_ADDRESS(& target);
-
- if ((t->flags & (STRCUR|STRING)) != 0) {
- if (t->stlen > 0 && (t->stptr[0] == 'g' || t->stptr[0] == 'G'))
- how_many = -1;
- else {
- d = force_number(t);
+done:
+ DEREF(s);
- if ((t->flags & NUMCUR) != 0)
- goto set_how_many;
+ *num_matches = matches;
+ if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL)
+ efree(buf);
- how_many = 1;
+ if (flags & GENSUB) {
+ if (matches > 0) {
+ /* return the result string */
+ DEREF(t);
+ return make_str_node(buf, textlen, ALREADY_MALLOCED);
}
- } else {
- d = force_number(t);
-set_how_many:
- if (d < 1)
- how_many = 1;
- else if (d < LONG_MAX)
- how_many = d;
- else
- how_many = LONG_MAX;
- if (d == 0)
- warning(_("gensub: third argument of 0 treated as 1"));
- }
-
- DEREF(t);
- ret = sub_common(3, how_many, TRUE);
- unref(ret);
+ /* return the original string */
+ return t;
+ }
- /*
- * Note that we don't care what sub_common() returns, since the
- * easiest thing for the programmer is to return the string, even
- * if no substitutions were done.
- */
+ /* For a string literal, must not change the original string. */
+ if (flags & LITERAL)
+ DEREF(t);
+ else if (matches > 0) {
+ unref(*lhs);
+ *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
+ }
- return target;
+ return make_number((AWKNUM) matches);
}
+
/* make_integer - Convert an integer to a number node. */
static NODE *
diff --git a/debug.c b/debug.c
index 9b9db345..404042c1 100644
--- a/debug.c
+++ b/debug.c
@@ -3740,7 +3740,16 @@ print_instruction(INSTRUCTION *pc, Func_print print_func, FILE *fp, int in_dump)
break;
case Op_var_assign:
- print_func(fp, "[set_%s]\n", pc->memory->vname);
+ if (pc->assign_var)
+ print_func(fp, "[set_%s()]", pc->memory->vname);
+ print_func(fp, "\n");
+ break;
+
+ case Op_field_assign:
+ if (pc->field_assign)
+ print_func(fp, "[%s]", pc->field_assign == reset_record ?
+ "reset_record()" : "invalidate_field0()");
+ print_func(fp, "\n");
break;
case Op_field_spec_lhs:
@@ -3830,6 +3839,27 @@ print_instruction(INSTRUCTION *pc, Func_print print_func, FILE *fp, int in_dump)
pc->line_range, pc->target_jmp);
break;
+ case Op_sub_builtin:
+ {
+ const char *fname = "sub";
+ static const struct flagtab values[] = {
+ { GSUB, "GSUB" },
+ { GENSUB, "GENSUB" },
+ { AFTER_ASSIGN, "AFTER_ASSIGN" },
+ { LITERAL, "LITERAL" },
+ { 0, NULL }
+ };
+
+ if (pc->sub_flags & GSUB)
+ fname = "gsub";
+ else if (pc->sub_flags & GENSUB)
+ fname = "gensub";
+ print_func(fp, "%s [arg_count = %ld] [sub_flags = %s]\n",
+ fname, pc->expr_count,
+ genflags2str(pc->sub_flags, values));
+ }
+ break;
+
case Op_builtin:
{
const char *fname = getfname(pc->builtin);
diff --git a/eval.c b/eval.c
index 4132474a..bdbd04bd 100644
--- a/eval.c
+++ b/eval.c
@@ -348,6 +348,7 @@ static struct optypetab {
{ "Op_K_getline", "getline" },
{ "Op_K_nextfile", "nextfile" },
{ "Op_builtin", NULL },
+ { "Op_sub_builtin", NULL },
{ "Op_in_array", " in " },
{ "Op_func_call", NULL },
{ "Op_indirect_func_call", NULL },
@@ -2114,11 +2115,13 @@ post:
break;
case Op_var_assign:
- pc->assign_var();
+ if (pc->assign_var)
+ pc->assign_var();
break;
case Op_field_assign:
- pc->field_assign();
+ if (pc->field_assign)
+ pc->field_assign();
break;
case Op_concat:
@@ -2256,7 +2259,34 @@ arrayfor:
#endif
PUSH(r);
break;
-
+
+ case Op_sub_builtin:
+ {
+ /* sub, gsub and gensub */
+
+ int matches = 0;
+
+ r = do_sub(pc->expr_count, pc->sub_flags, & matches);
+ PUSH(r);
+
+ if (matches == 0 && (pc->sub_flags & AFTER_ASSIGN) != 0) {
+
+ /* For sub and gsub, must not execute after_assign code;
+ * If the target is a FIELD, this means no field re-splitting or
+ * $0 reconstruction. For a special variable as target,
+ * set_XX routine is not called.
+ */
+
+ ni = pc->nexti;
+ assert(ni->opcode == Op_field_assign || ni->opcode == Op_var_assign);
+ if (ni->opcode == Op_field_assign)
+ ni->field_assign = (Func_ptr) 0;
+ else
+ ni->assign_var = (Func_ptr) 0;
+ }
+ }
+ break;
+
case Op_K_print:
do_print(pc->expr_count, pc->redir_type);
break;
diff --git a/profile.c b/profile.c
index cba8be9e..01d1e42f 100644
--- a/profile.c
+++ b/profile.c
@@ -507,6 +507,20 @@ cleanup:
case Op_after_endfile:
break;
+ case Op_sub_builtin:
+ {
+ const char *fname = "sub";
+ if (pc->sub_flags & GSUB)
+ fname = "gsub";
+ else if (pc->sub_flags & GENSUB)
+ fname = "gensub";
+ tmp = pp_list(pc->expr_count, "()", ", ");
+ str = pp_concat(fname, tmp, "");
+ efree(tmp);
+ pp_push(Op_sub_builtin, str, CAN_FREE);
+ }
+ break;
+
case Op_builtin:
{
static char *ext_func = "extension_function()";
diff --git a/test/ChangeLog b/test/ChangeLog
index 2ae225be..60c4f525 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,8 @@
+2011-07-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (gsubtst7): New test.
+ * gsubtst7.awk, gsubtst7.in, gsubtst7.ok: New files.
+
2011-06-24 Arnold D. Robbins <arnold@skeeve.com>
* Makefile.am (EXTRA_DIST): Add ChangeLog.0.
diff --git a/test/Makefile.am b/test/Makefile.am
index 2d7bf34f..12f64a62 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -318,6 +318,9 @@ EXTRA_DIST = \
gsubtst5.ok \
gsubtst6.awk \
gsubtst6.ok \
+ gsubtst7.awk \
+ gsubtst7.in \
+ gsubtst7.ok \
gtlnbufv.awk \
hex.awk \
hex.ok \
@@ -768,6 +771,7 @@ BASIC_TESTS = \
fordel forref forsimp fsbs fsrs fsspcoln fstabplus funsemnl funsmnam \
funstack getline getline2 getline3 getlnbuf getnr2tb getnr2tm \
gsubasgn gsubtest gsubtst2 gsubtst3 gsubtst4 gsubtst5 gsubtst6 \
+ gsubtst7 \
hex hsprint inputred intest intprec iobug1 leaddig leadnl litoct \
longsub longwrds manglprm math membug1 messages minusstr mmap8k \
mtchi18n nasty nasty2 negexp negrange nested nfldstr nfneg \
diff --git a/test/Makefile.in b/test/Makefile.in
index 04ea041c..eee32eba 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -503,6 +503,9 @@ EXTRA_DIST = \
gsubtst5.ok \
gsubtst6.awk \
gsubtst6.ok \
+ gsubtst7.awk \
+ gsubtst7.in \
+ gsubtst7.ok \
gtlnbufv.awk \
hex.awk \
hex.ok \
@@ -953,6 +956,7 @@ BASIC_TESTS = \
fordel forref forsimp fsbs fsrs fsspcoln fstabplus funsemnl funsmnam \
funstack getline getline2 getline3 getlnbuf getnr2tb getnr2tm \
gsubasgn gsubtest gsubtst2 gsubtst3 gsubtst4 gsubtst5 gsubtst6 \
+ gsubtst7 \
hex hsprint inputred intest intprec iobug1 leaddig leadnl litoct \
longsub longwrds manglprm math membug1 messages minusstr mmap8k \
mtchi18n nasty nasty2 negexp negrange nested nfldstr nfneg \
@@ -2077,6 +2081,11 @@ gsubtst5:
@AWKPATH=$(srcdir) $(AWK) -f $@.awk < $(srcdir)/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@
+gsubtst7:
+ @echo gsubtst7
+ @AWKPATH=$(srcdir) $(AWK) -f $@.awk < $(srcdir)/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@
+
hex:
@echo hex
@AWKPATH=$(srcdir) $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/Maketests b/test/Maketests
index 9a16eb7c..9f364038 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -350,6 +350,11 @@ gsubtst5:
@AWKPATH=$(srcdir) $(AWK) -f $@.awk < $(srcdir)/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@
+gsubtst7:
+ @echo gsubtst7
+ @AWKPATH=$(srcdir) $(AWK) -f $@.awk < $(srcdir)/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@
+
hex:
@echo hex
@AWKPATH=$(srcdir) $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@