summaryrefslogtreecommitdiff
path: root/awkgram.y
diff options
context:
space:
mode:
Diffstat (limited to 'awkgram.y')
-rw-r--r--awkgram.y5395
1 files changed, 3748 insertions, 1647 deletions
diff --git a/awkgram.y b/awkgram.y
index f58e0d5b..1247cfd9 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -34,64 +34,103 @@
#define signed /**/
#endif
-#define CAN_FREE TRUE
-#define DONT_FREE FALSE
-
-#ifdef CAN_USE_STDARG_H
static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
-#else
-static void yyerror(); /* va_alist */
-#endif
-static char *get_src_buf P((void));
-static int yylex P((void));
-static NODE *node_common P((NODETYPE op));
-static NODE *snode P((NODE *subn, NODETYPE op, int sindex));
-static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr));
-static NODE *append_right P((NODE *list, NODE *new));
-static inline NODE *append_pattern P((NODE **list, NODE *patt));
-static void func_install P((NODE *params, NODE *def));
-static void pop_var P((NODE *np, int freeit));
-static void pop_params P((NODE *params));
-static NODE *make_param P((char *name));
-static NODE *mk_rexp P((NODE *exp));
-static int dup_parms P((NODE *func));
-static void param_sanity P((NODE *arglist));
-static int parms_shadow P((const char *fname, NODE *func));
-static int isnoeffect P((NODETYPE t));
-static int isassignable P((NODE *n));
-static void dumpintlstr P((const char *str, size_t len));
-static void dumpintlstr2 P((const char *str1, size_t len1, const char *str2, size_t len2));
-static void count_args P((NODE *n));
-static int isarray P((NODE *n));
+static char *get_src_buf(void);
+static int yylex(void);
+int yyparse(void);
+static INSTRUCTION *snode(INSTRUCTION *subn, INSTRUCTION *op);
+static int func_install(INSTRUCTION *fp, INSTRUCTION *def);
+static void pop_params(NODE *params);
+static NODE *make_param(char *pname);
+static NODE *mk_rexp(INSTRUCTION *exp);
+static void append_param(char *pname);
+static int dup_parms(NODE *func);
+static void param_sanity(INSTRUCTION *arglist);
+static int parms_shadow(INSTRUCTION *pc, int *shadow);
+static int isnoeffect(OPCODE type);
+static INSTRUCTION *make_assignable(INSTRUCTION *ip);
+static void dumpintlstr(const char *str, size_t len);
+static void dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2);
+static int isarray(NODE *n);
+static int include_source(char *src);
+static void next_sourcefile(void);
+static char *tokexpand(void);
+
+#define instruction(t) bcalloc(t, 1, 0)
+
+static INSTRUCTION *mk_program(void);
+static INSTRUCTION *append_rule(INSTRUCTION *pattern, INSTRUCTION *action);
+static INSTRUCTION *mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
+ INSTRUCTION *elsep, INSTRUCTION *false_branch);
+static INSTRUCTION *mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1);
+static INSTRUCTION *mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
+ INSTRUCTION *incr, INSTRUCTION *body);
+static void fix_break_continue(INSTRUCTION *start, INSTRUCTION *end, int check_continue);
+static INSTRUCTION *mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op);
+static INSTRUCTION *mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op);
+static INSTRUCTION *mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op);
+static INSTRUCTION *mk_getline(INSTRUCTION *op, INSTRUCTION *opt_var, INSTRUCTION *redir, OPCODE redirtype);
+static NODE *make_regnode(int type, NODE *exp);
+static int count_expressions(INSTRUCTION **list, int isarg);
+static INSTRUCTION *optimize_assignment(INSTRUCTION *exp);
+static void add_lint(INSTRUCTION *list, LINTTYPE linttype);
enum defref { FUNC_DEFINE, FUNC_USE };
-static void func_use P((const char *name, enum defref how));
-static void check_funcs P((void));
+static void func_use(const char *name, enum defref how);
+static void check_funcs(void);
+static void free_bcpool(INSTRUCTION *pl);
-static ssize_t read_one_line P((int fd, void *buffer, size_t count));
-static int one_line_close P((int fd));
+static ssize_t read_one_line(int fd, void *buffer, size_t count);
+static int one_line_close(int fd);
-static NODE *constant_fold P((NODE *left, NODETYPE op, NODE *right));
-static NODE *optimize_concat P((NODE *left, NODETYPE op, NODE *right));
+static void (*install_func)(char *) = NULL;
+static int want_source = FALSE;
static int want_regexp; /* lexical scanning kludge */
static int can_return; /* parsing kludge */
-static int begin_or_end_rule = FALSE; /* parsing kludge */
-static int parsing_end_rule = FALSE; /* for warnings */
-static int beginfile_or_endfile_rule = FALSE; /* parsing kludge */
-static int parsing_endfile_rule = FALSE; /* for warnings */
+static int rule = 0;
+
+const char *const ruletab[] = {
+ "?",
+ "BEGIN",
+ "Rule",
+ "END",
+ "BEGINFILE",
+ "ENDFILE",
+};
+
static int in_print = FALSE; /* lexical scanning kludge for print */
static int in_parens = 0; /* lexical scanning kludge for print */
-static char *lexptr; /* pointer to next char during parsing */
+static int sub_counter = 0; /* array dimension counter for use in delete */
+static char *lexptr = NULL; /* pointer to next char during parsing */
static char *lexend;
static char *lexptr_begin; /* keep track of where we were for error msgs */
static char *lexeme; /* beginning of lexeme for debugging */
+static int lexeof; /* seen EOF for current source? */
static char *thisline = NULL;
+static int in_braces = 0; /* count braces for firstline, lastline in an 'action' */
+static int lastline = 0;
+static int firstline = 0;
+static SRCFILE *sourcefile = NULL; /* current program source */
+static int lasttok = 0;
+static int eof_warned = FALSE; /* GLOBAL: want warning for each file */
+static int break_allowed; /* kludge for break */
+static int continue_allowed; /* kludge for continue */
+
+
+#define END_FILE -1000
+#define END_SRC -2000
+
#define YYDEBUG_LEXER_TEXT (lexeme)
static int param_counter;
+static NODE *func_params; /* list of parameters for the current function */
static char *tokstart = NULL;
static char *tok = NULL;
static char *tokend;
+static int errcount = 0;
+
+static NODE *symbol_list;
+extern void destroy_symbol(char *name);
static long func_count; /* total number of functions */
@@ -101,14 +140,25 @@ static int var_count; /* total number of global variables */
extern char *source;
extern int sourceline;
-extern struct src *srcfiles;
-extern long numfiles;
-extern int errcount;
-extern NODE *begin_block;
-extern NODE *end_block;
-extern NODE *beginfile_block;
-extern NODE *endfile_block;
+extern SRCFILE *srcfiles;
+extern INSTRUCTION *rule_list;
+extern int max_args;
+
+static INSTRUCTION *rule_block[sizeof(ruletab)];
+
+static INSTRUCTION *ip_rec;
+static INSTRUCTION *ip_newfile;
+static INSTRUCTION *ip_atexit = NULL;
+static INSTRUCTION *ip_end;
+static INSTRUCTION *ip_endfile;
+static INSTRUCTION *ip_beginfile;
+static inline INSTRUCTION *list_create(INSTRUCTION *x);
+static inline INSTRUCTION *list_append(INSTRUCTION *l, INSTRUCTION *x);
+static inline INSTRUCTION *list_prepend(INSTRUCTION *l, INSTRUCTION *x);
+static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2);
+
+extern double fmod(double x, double y);
/*
* This string cannot occur as a real awk identifier.
* Use it as a special token to make function parsing
@@ -119,48 +169,27 @@ extern NODE *endfile_block;
* should only produce one error message, and not core dump.
*/
static char builtin_func[] = "@builtin";
+
+#define YYSTYPE INSTRUCTION *
%}
-%union {
- long lval;
- AWKNUM fval;
- NODE *nodeval;
- NODETYPE nodetypeval;
- char *sval;
- NODE *(*ptrval) P((void));
-}
-
-%type <nodeval> function_prologue pattern action variable param_list
-%type <nodeval> exp common_exp
-%type <nodeval> simp_exp simp_exp_nc non_post_simp_exp
-%type <nodeval> expression_list opt_expression_list print_expression_list
-%type <nodeval> statements statement if_statement switch_body case_statements case_statement case_value opt_param_list
-%type <nodeval> simple_stmt opt_simple_stmt
-%type <nodeval> opt_exp opt_variable regexp
-%type <nodeval> input_redir output_redir field_spec
-%type <nodeval> function_call direct_function_call
-%type <nodetypeval> print
-%type <nodetypeval> assign_operator a_relop relop_or_less
-%type <sval> func_name opt_incdec
-%type <lval> lex_builtin
-
-%token <sval> FUNC_CALL NAME REGEXP
-%token <lval> ERROR
-%token <nodeval> YNUMBER YSTRING
-%token <nodetypeval> RELOP IO_OUT IO_IN
-%token <nodetypeval> ASSIGNOP ASSIGN MATCHOP CONCAT_OP
-%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
-%token <nodetypeval> LEX_BEGINFILE LEX_ENDFILE
-%token <nodetypeval> LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
-%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
-%token <nodetypeval> LEX_GETLINE LEX_NEXTFILE
-%token <nodetypeval> LEX_IN
-%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
-%token <lval> LEX_BUILTIN LEX_LENGTH
+%token FUNC_CALL NAME REGEXP FILENAME
+%token YNUMBER YSTRING
+%token RELOP IO_OUT IO_IN
+%token ASSIGNOP ASSIGN MATCHOP CONCAT_OP
+%token SUBSCRIPT
+%token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
+%token LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
+%token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
+%token LEX_BEGINFILE LEX_ENDFILE
+%token LEX_GETLINE LEX_NEXTFILE
+%token LEX_IN
+%token LEX_AND LEX_OR INCREMENT DECREMENT
+%token LEX_BUILTIN LEX_LENGTH
+%token LEX_EOF
+%token LEX_INCLUDE LEX_EVAL
%token NEWLINE
-/* these are just yylval numbers */
-
/* Lowest to highest */
%right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
%right '?' ':'
@@ -170,7 +199,7 @@ static char builtin_func[] = "@builtin";
%nonassoc LEX_IN
%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
%nonassoc ','
-%nonassoc MATCHOP
+%left MATCHOP
%nonassoc RELOP '<' '>' IO_IN IO_OUT
%left CONCAT_OP
%left YSTRING YNUMBER
@@ -183,81 +212,106 @@ static char builtin_func[] = "@builtin";
%left '(' ')'
%%
-start
- : opt_nls program opt_nls
- {
- check_funcs();
- }
- ;
-
program
: /* empty */
| program rule
{
- beginfile_or_endfile_rule = begin_or_end_rule = parsing_end_rule = FALSE;
+ rule = 0;
yyerrok;
}
+ | program nls
+ | program LEX_EOF
+ {
+ next_sourcefile();
+ }
| program error
- {
- beginfile_or_endfile_rule = begin_or_end_rule = parsing_end_rule = FALSE;
+ {
+ rule = 0;
/*
* If errors, give up, don't produce an infinite
* stream of syntax error messages.
*/
/* yyerrok; */
- }
+ }
;
rule
: pattern action
{
- $1->rnode = $2;
+ (void) append_rule($1, $2);
}
| pattern statement_term
{
- if ($1->lnode != NULL) {
- /* pattern rule with non-empty pattern */
- $1->rnode = node(NULL, Node_K_print_rec, NULL);
- } else {
- /* an error */
- if (begin_or_end_rule)
- msg(_("%s blocks must have an action part"),
- (parsing_end_rule ? "END" : "BEGIN"));
- else if (beginfile_or_endfile_rule)
- msg(_("%s blocks must have an action part"),
- (parsing_endfile_rule ? "ENDFILE" : "BEGINFILE"));
- else
- msg(_("each rule must have a pattern or an action part"));
+ if (rule != Rule) {
+ msg(_("%s blocks must have an action part"), ruletab[rule]);
errcount++;
- }
+ } else if ($1 == NULL) {
+ msg(_("each rule must have a pattern or an action part"));
+ errcount++;
+ } else /* pattern rule with non-empty pattern */
+ (void) append_rule($1, NULL);
}
| function_prologue action
{
can_return = FALSE;
- if ($1)
- func_install($1, $2);
+ if ($1 && func_install($1, $2) < 0)
+ YYABORT;
+ func_params = NULL;
+ yyerrok;
+ }
+ | '@' LEX_INCLUDE source statement_term
+ {
+ want_source = FALSE;
yyerrok;
}
;
-pattern
- : /* empty */
+source
+ : FILENAME
{
- $$ = append_pattern(&expression_value, (NODE *) NULL);
+ char *src = $1->lextok;
+ if (include_source(src) < 0)
+ YYABORT;
+ efree(src);
+ bcfree($1);
+ $$ = NULL;
}
+ | FILENAME error
+ { $$ = NULL; }
+ | error
+ { $$ = NULL; }
+ ;
+
+pattern
+ : /* empty */
+ { $$ = NULL; rule = Rule; }
| exp
- {
- $$ = append_pattern(&expression_value, $1);
- }
+ { $$ = $1; rule = Rule; }
| exp ',' exp
{
- NODE *r;
-
- getnode(r);
- r->type = Node_line_range;
- r->condpair = node($1, Node_cond_pair, $3);
- r->triggered = FALSE;
- $$ = append_pattern(&expression_value, r);
+ INSTRUCTION *tp;
+
+ add_lint($1, LINT_assign_in_cond);
+ add_lint($3, LINT_assign_in_cond);
+
+ tp = instruction(Op_no_op);
+ list_prepend($1, bcalloc(Op_line_range, !!do_profiling + 1, 0));
+ $1->nexti->triggered = FALSE;
+ $1->nexti->target_jmp = $3->nexti;
+
+ list_append($1, instruction(Op_cond_pair));
+ $1->lasti->line_range = $1->nexti;
+ $1->lasti->target_jmp = tp;
+
+ list_append($3, instruction(Op_cond_pair));
+ $3->lasti->line_range = $1->nexti;
+ $3->lasti->target_jmp = tp;
+ if (do_profiling) {
+ ($1->nexti + 1)->condpair_left = $1->lasti;
+ ($1->nexti + 1)->condpair_right = $3->lasti;
+ }
+ $$ = list_append(list_merge($1, $3), tp);
+ rule = Rule;
}
| LEX_BEGIN
{
@@ -265,8 +319,9 @@ pattern
if (do_lint_old && ++begin_seen == 2)
warning(_("old awk does not support multiple `BEGIN' or `END' rules"));
- begin_or_end_rule = TRUE;
- $$ = append_pattern(&begin_block, (NODE *) NULL);
+ $1->in_rule = rule = BEGIN;
+ $1->source_file = source;
+ $$ = $1;
}
| LEX_END
{
@@ -274,39 +329,52 @@ pattern
if (do_lint_old && ++end_seen == 2)
warning(_("old awk does not support multiple `BEGIN' or `END' rules"));
- begin_or_end_rule = parsing_end_rule = TRUE;
- $$ = append_pattern(&end_block, (NODE *) NULL);
+ $1->in_rule = rule = END;
+ $1->source_file = source;
+ $$ = $1;
}
| LEX_BEGINFILE
{
- beginfile_or_endfile_rule = TRUE;
- $$ = append_pattern(&beginfile_block, (NODE *) NULL);
+ $1->in_rule = rule = BEGINFILE;
+ $1->source_file = source;
+ $$ = $1;
}
| LEX_ENDFILE
{
- beginfile_or_endfile_rule = parsing_endfile_rule = TRUE;
- $$ = append_pattern(&endfile_block, (NODE *) NULL);
+ $1->in_rule = rule = ENDFILE;
+ $1->source_file = source;
+ $$ = $1;
}
;
action
: l_brace statements r_brace opt_semi opt_nls
- { $$ = $2; }
+ {
+ if ($2 == NULL)
+ $$ = list_create(instruction(Op_no_op));
+ else
+ $$ = $2;
+ }
;
func_name
: NAME
- { $$ = $1; }
+ { $$ = $1; }
| FUNC_CALL
- { $$ = $1; }
+ { $$ = $1; }
| lex_builtin
{
yyerror(_("`%s' is a built-in function, it cannot be redefined"),
tokstart);
- errcount++;
- $$ = builtin_func;
+ $1->opcode = Op_symbol; /* Op_symbol instead of Op_token so that
+ * free_bc_internal does not try to free it
+ */
+ $1->lextok = builtin_func;
+ $$ = $1;
/* yyerrok; */
}
+ | '@' LEX_EVAL
+ { $$ = $2; }
;
lex_builtin
@@ -315,20 +383,26 @@ lex_builtin
;
function_prologue
- : LEX_FUNCTION
- {
- param_counter = 0;
- }
- func_name '(' opt_param_list r_paren opt_nls
- {
+ : LEX_FUNCTION
+ {
+ param_counter = 0;
+ func_params = NULL;
+ }
+ func_name '(' opt_param_list r_paren opt_nls
+ {
NODE *t;
- t = make_param($3);
+ $1->source_file = source;
+ t = make_param($3->lextok);
+ $3->lextok = NULL;
+ bcfree($3);
t->flags |= FUNC;
- $$ = append_right(t, $5);
+ t->rnode = func_params;
+ func_params = t;
+ $$ = $1;
can_return = TRUE;
/* check for duplicate parameter names */
- if (dup_parms($$))
+ if (dup_parms(t))
errcount++;
}
;
@@ -342,55 +416,56 @@ regexp
{ ++want_regexp; }
REGEXP /* The terminating '/' is consumed by yylex(). */
{
- NODE *n;
- size_t len = strlen($3);
+ NODE *n, *exp;
+ char *re;
+ size_t len;
+ re = $3->lextok;
+ len = strlen(re);
if (do_lint) {
if (len == 0)
lintwarn(_("regexp constant `//' looks like a C++ comment, but is not"));
- else if (($3)[0] == '*' && ($3)[len-1] == '*')
+ else if ((re)[0] == '*' && (re)[len-1] == '*')
/* possible C comment */
lintwarn(_("regexp constant `/%s/' looks like a C comment, but is not"), tokstart);
}
- getnode(n);
- n->type = Node_regex;
- n->re_exp = make_string($3, len);
- n->re_reg = make_regexp($3, len, FALSE, TRUE);
- n->re_text = NULL;
- n->re_flags = CONSTANT;
- n->re_cnt = 1;
- $$ = n;
+
+ exp = make_str_node(re, len, ALREADY_MALLOCED);
+ n = make_regnode(Node_regex, exp);
+ if (n == NULL) {
+ unref(exp);
+ YYABORT;
+ }
+ $$ = $3;
+ $$->opcode = Op_match_rec;
+ $$->memory = n;
}
;
a_slash
: '/'
+ { bcfree($1); }
| SLASH_BEFORE_EQUAL
;
statements
: /* empty */
- { $$ = NULL; }
+ { $$ = NULL; }
| statements statement
{
if ($2 == NULL)
$$ = $1;
else {
- if (do_lint && isnoeffect($2->type))
- lintwarn(_("statement may have no effect"));
+ add_lint($2, LINT_no_effect);
if ($1 == NULL)
$$ = $2;
else
- $$ = append_right(
- ($1->type == Node_statement_list ? $1
- : node($1, Node_statement_list, (NODE *) NULL)),
- ($2->type == Node_statement_list ? $2
- : node($2, Node_statement_list, (NODE *) NULL)));
+ $$ = list_merge($1, $2);
}
- yyerrok;
+ yyerrok;
}
| statements error
- { $$ = NULL; }
+ { $$ = NULL; }
;
statement_term
@@ -400,21 +475,140 @@ statement_term
statement
: semi opt_nls
- { $$ = NULL; }
+ { $$ = NULL; }
| l_brace statements r_brace
- { $$ = $2; }
+ { $$ = $2; }
| if_statement
- { $$ = $1; }
+ {
+ if (do_profiling)
+ $$ = list_prepend($1, instruction(Op_exec_count));
+ else
+ $$ = $1;
+ }
| LEX_SWITCH '(' exp r_paren opt_nls l_brace switch_body opt_nls r_brace
- { $$ = node($3, Node_K_switch, $7); }
+ {
+ INSTRUCTION *ip;
+
+ $1->opcode = Op_push_loop;
+ $1->target_continue = NULL;
+ ip = list_prepend($3, $1);
+ if ($7->nexti->switch_dflt == NULL)
+ $7->nexti->switch_dflt = $1; /* implicit break */
+ if (do_profiling) {
+ (void) list_prepend(ip, instruction(Op_exec_count));
+ ($1 + 1)->opcode = Op_K_switch;
+ ($1 + 1)->switch_body = $7->nexti;
+ }
+ (void) list_merge(ip, $7);
+ $$ = list_append(ip, instruction(Op_pop_loop));
+ $1->target_break = $$->lasti;
+
+ break_allowed--;
+ fix_break_continue($1, $$->lasti, FALSE);
+ }
| LEX_WHILE '(' exp r_paren opt_nls statement
- { $$ = node($3, Node_K_while, $6); }
+ {
+ /*
+ * [Op_push_loop| z| y]
+ * -----------------
+ * z:
+ * cond
+ * -----------------
+ * [Op_jmp_false y ]
+ * -----------------
+ * body
+ * -----------------
+ * [Op_jmp z ]
+ * y: [Op_pop_loop ]
+ */
+
+ INSTRUCTION *ip, *tp;
+
+ tp = instruction(Op_pop_loop);
+
+ add_lint($3, LINT_assign_in_cond);
+ $1->opcode = Op_push_loop;
+ $1->target_continue = $3->nexti;
+ $1->target_break = tp;
+ ip = list_create($1);
+
+ (void) list_merge(ip, $3);
+ (void) list_append(ip, instruction(Op_jmp_false));
+ ip->lasti->target_jmp = tp;
+
+ if (do_profiling) {
+ (void) list_append(ip, instruction(Op_exec_count));
+ ($1 + 1)->opcode = Op_K_while;
+ ($1 + 1)->while_body = ip->lasti;
+ }
+
+ if ($6 != NULL)
+ (void) list_merge(ip, $6);
+ (void) list_append(ip, instruction(Op_jmp));
+ ip->lasti->target_jmp = $1->target_continue;
+ $$ = list_append(ip, tp);
+
+ break_allowed--;
+ continue_allowed--;
+ fix_break_continue($1, tp, TRUE);
+ }
| LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
- { $$ = node($6, Node_K_do, $3); }
- | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
{
- /*
- * Efficiency hack. Recognize the special case of
+ /*
+ * [Op_push_loop | x | y]
+ * -----------------
+ * z:
+ * body
+ * -----------------
+ * x:
+ * cond
+ * -----------------
+ * [Op_jmp_true | z ]
+ * y: [Op_pop_loop ]
+ */
+
+ INSTRUCTION *ip;
+
+ $4->opcode = Op_pop_loop;
+ $1->opcode = Op_push_loop;
+ $1->target_continue = $6->nexti;
+ $1->target_break = $4;
+
+ add_lint($6, LINT_assign_in_cond);
+ if ($3 != NULL)
+ ip = list_merge($3, $6);
+ else
+ ip = list_prepend($6, instruction(Op_no_op));
+
+ if (do_profiling) {
+ (void) list_prepend(ip, instruction(Op_exec_count));
+ ($1 + 1)->opcode = Op_K_do;
+ ($1 + 1)->doloop_cond = $1->target_continue;
+ }
+
+ (void) list_append(ip, instruction(Op_jmp_true));
+ ip->lasti->target_jmp = ip->nexti;
+ $$ = list_prepend(ip, $1);
+ (void) list_append(ip, $4);
+
+ break_allowed--;
+ continue_allowed--;
+ fix_break_continue($1, $4, TRUE);
+ }
+ | LEX_FOR '(' NAME LEX_IN simple_variable r_paren opt_nls statement
+ {
+
+ char *var_name = $3->lextok;
+
+ if ($8 != NULL
+ && $8->lasti->opcode == Op_K_delete
+ && $8->lasti->expr_count == 1
+ && $8->nexti->opcode == Op_push
+ && ($8->nexti->memory->type != Node_var || !($8->nexti->memory->var_update))
+ && strcmp($8->nexti->memory->vname, var_name) == 0
+ ) {
+
+ /* Efficiency hack. Recognize the special case of
*
* for (iggy in foo)
* delete foo[iggy]
@@ -425,113 +619,192 @@ statement
*
* Check that the body is a `delete a[i]' statement,
* and that both the loop var and array names match.
- */
- if ($8 != NULL && $8->type == Node_K_delete && $8->rnode != NULL) {
- NODE *arr, *sub;
-
- assert($8->rnode->type == Node_expression_list);
- arr = $8->lnode; /* array var */
- sub = $8->rnode->lnode; /* index var */
-
- if ( (arr->type == Node_var_new
- || arr->type == Node_var_array
- || arr->type == Node_param_list)
- && (sub->type == Node_var_new
- || sub->type == Node_var
- || sub->type == Node_param_list)
- && strcmp($3, sub->vname) == 0
- && strcmp($5, arr->vname) == 0) {
- $8->type = Node_K_delete_loop;
+ */
+ NODE *arr = NULL;
+ INSTRUCTION *ip = $8->nexti->nexti;
+
+ if ($5->nexti->opcode == Op_push && $5->lasti == $5->nexti)
+ arr = $5->nexti->memory;
+ if (arr != NULL
+ && ip->opcode == Op_no_op
+ && ip->nexti->opcode == Op_push_array
+ && strcmp(ip->nexti->memory->vname, arr->vname) == 0
+ && ip->nexti->nexti == $8->lasti
+ ) {
+ (void) make_assignable($8->nexti);
+ $8->lasti->opcode = Op_K_delete_loop;
+ $8->lasti->expr_count = 0;
+ bcfree($1);
+ efree(var_name);
+ bcfree($3);
+ bcfree($4);
+ bcfree($5);
$$ = $8;
- free($3); /* thanks to valgrind for pointing these out */
- free($5);
}
- else
- goto regular_loop;
} else {
- regular_loop:
- $$ = node($8, Node_K_arrayfor,
- make_for_loop(variable($3, CAN_FREE, Node_var),
- (NODE *) NULL, variable($5, CAN_FREE, Node_var_array)));
- }
+
+ /* [ Op_push_array a ]
+ * [ Op_arrayfor_init| w ]
+ * [ Op_push_loop | z | y ]
+ * z: [ Op_arrayfor_incr | y ]
+ * [ Op_var_assign if any ]
+ *
+ * body
+ *
+ * [Op_jmp | z ]
+ * y: [Op_pop_loop ]
+ * w: [Op_arrayfor_final ]
+ */
+ INSTRUCTION *ip;
+
+ ip = $5;
+ ip->nexti->opcode = Op_push_array;
+ $3->opcode = Op_arrayfor_init;
+ (void) list_append(ip, $3);
+
+ $4->opcode = Op_arrayfor_incr;
+ $4->array_var = variable(var_name, Node_var);
+ $1->opcode = Op_push_loop;
+ $1->target_continue = $4;
+
+ (void) list_append(ip, $1);
+
+ /* add update_FOO instruction if necessary */
+ if ($4->array_var->type == Node_var && $4->array_var->var_update) {
+ (void) list_append(ip, instruction(Op_var_update));
+ ip->lasti->memory = $4->array_var;
+ }
+ (void) list_append(ip, $4);
+
+ /* add set_FOO instruction if necessary */
+ if ($4->array_var->type == Node_var && $4->array_var->var_assign) {
+ (void) list_append(ip, instruction(Op_var_assign));
+ ip->lasti->memory = $4->array_var;
+ }
+
+ if (do_profiling) {
+ (void) list_append(ip, instruction(Op_exec_count));
+ ($1 + 1)->opcode = Op_K_arrayfor;
+ ($1 + 1)->forloop_cond = $4;
+ ($1 + 1)->forloop_body = ip->lasti;
+ }
+
+ if ($8 != NULL)
+ (void) list_merge(ip, $8);
+
+ (void) list_append(ip, instruction(Op_jmp));
+ ip->lasti->target_jmp = $4;
+ (void) list_append(ip, instruction(Op_pop_loop));
+ $4->target_jmp = $1->target_break = ip->lasti;
+ $$ = list_append(ip, instruction(Op_arrayfor_final));
+ $3->target_jmp = $$->lasti;
+
+ fix_break_continue($1, $4->target_jmp, TRUE);
+ }
+
+ break_allowed--;
+ continue_allowed--;
}
| LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
{
- $$ = node($12, Node_K_for, (NODE *) make_for_loop($3, $6, $9));
+ $$ = mk_for_loop($1, $3, $6, $9, $12);
+
+ break_allowed--;
+ continue_allowed--;
}
| LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
{
- $$ = node($11, Node_K_for,
- (NODE *) make_for_loop($3, (NODE *) NULL, $8));
+ $$ = mk_for_loop($1, $3, (INSTRUCTION *) NULL, $8, $11);
+
+ break_allowed--;
+ continue_allowed--;
+ }
+ | non_compound_stmt
+ {
+ if (do_profiling)
+ $$ = list_prepend($1, instruction(Op_exec_count));
+ else
+ $$ = $1;
+ }
+ ;
+
+non_compound_stmt
+ : LEX_BREAK statement_term
+ {
+ if (! break_allowed)
+ yyerror(_("`break' is not allowed outside a loop or switch"));
+
+ $1->target_jmp = NULL;
+ $$ = list_create($1);
+
}
- | LEX_BREAK statement_term
- /* for break, maybe we'll have to remember where to break to */
- { $$ = node((NODE *) NULL, Node_K_break, (NODE *) NULL); }
| LEX_CONTINUE statement_term
- /* similarly */
- { $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); }
- | LEX_NEXT statement_term
- { NODETYPE type;
+ {
+ if (! continue_allowed)
+ yyerror(_("`continue' is not allowed outside a loop"));
- if (begin_or_end_rule)
- yyerror(_("`%s' used in %s action"), "next",
- (parsing_end_rule ? "END" : "BEGIN"));
- else if (beginfile_or_endfile_rule)
- yyerror(_("`%s' used in %s action"), "next",
- (parsing_endfile_rule ? "ENDFILE" : "BEGINFILE"));
- type = Node_K_next;
- $$ = node((NODE *) NULL, type, (NODE *) NULL);
- }
+ $1->target_jmp = NULL;
+ $$ = list_create($1);
+
+ }
+ | LEX_NEXT statement_term
+ {
+ if (rule != Rule)
+ yyerror(_("`next' used in %s action"), ruletab[rule]);
+ $1->target_jmp = ip_rec;
+ $$ = list_create($1);
+ }
| LEX_NEXTFILE statement_term
- {
- static short warned = FALSE;
+ {
+ static short warned = FALSE;
- if (do_traditional) {
- /*
- * can't use yyerror, since may have overshot
- * the source line
- */
+ if (do_traditional) {
+ /*
+ * can't use yyerror, since may have overshot
+ * the source line
+ */
errcount++;
error(_("`nextfile' is a gawk extension"));
- }
- if (do_lint && ! warned) {
- warned = TRUE;
+ }
+ if (do_lint && ! warned) {
+ warned = TRUE;
lintwarn(_("`nextfile' is a gawk extension"));
- }
- if (begin_or_end_rule) {
- /* same thing */
- errcount++;
- error(_("`%s' used in %s action"), "nextfile",
- (parsing_end_rule ? "END" : "BEGIN"));
- }
-#if 0
- else if (beginfile_or_endfile_rule) {
- /* same thing */
- errcount++;
- error(_("`%s' used in %s action"), "nextfile",
- (parsing_endfile_rule ? "END" : "BEGIN"));
- }
-#endif
- else if (parsing_endfile_rule) {
- /* same thing */
+ }
+ if (rule == BEGIN || rule == END || rule == ENDFILE) {
errcount++;
- error(_("`%s' used in %s action"), "nextfile",
- (parsing_endfile_rule ? "ENDFILE" : "BEGINFILE"));
- }
- $$ = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL);
+ error(_("`nextfile' used in %s action"), ruletab[rule]);
}
+
+ $1->target_jmp = ip_newfile;
+ $1->target_endfile = ip_endfile;
+ $$ = list_create($1);
+ }
| LEX_EXIT opt_exp statement_term
- { $$ = node($2, Node_K_exit, (NODE *) NULL); }
+ {
+ if (rule == END)
+ $1->target_jmp = ip_atexit;
+ else
+ $1->target_jmp = ip_end; /* first instruction (no-op) in end block */
+
+ if ($2 == NULL) {
+ $$ = list_create($1);
+ (void) list_prepend($$, instruction(Op_push_i));
+ $$->nexti->memory = Nnull_string;
+ } else
+ $$ = list_append($2, $1);
+ }
| LEX_RETURN
- {
- if (! can_return)
+ {
+ if (! can_return)
yyerror(_("`return' used outside function context"));
- }
- opt_exp statement_term
- {
- $$ = node($3 == NULL ? Nnull_string : $3,
- Node_K_return, (NODE *) NULL);
- }
+ } opt_exp statement_term {
+ if ($3 == NULL) {
+ $$ = list_create($1);
+ (void) list_prepend($$, instruction(Op_push_i));
+ $$->nexti->memory = Nnull_string;
+ } else
+ $$ = list_append($3, $1);
+ }
| simple_stmt statement_term
;
@@ -548,136 +821,213 @@ simple_stmt
{
/*
* Optimization: plain `print' has no expression list, so $3 is null.
- * If $3 is an expression list with one element (rnode == null)
- * and lnode is a field spec for field 0, we have `print $0'.
- * For both, use Node_K_print_rec, which is faster for these two cases.
+ * If $3 is NULL or is a bytecode list for $0 use Op_K_print_rec,
+ * which is faster for these two cases.
*/
- if ($1 == Node_K_print &&
- ($3 == NULL
- || ($3->type == Node_expression_list
- && $3->rnode == NULL
- && $3->lnode->type == Node_field_spec
- && $3->lnode->lnode->type == Node_val
- && $3->lnode->lnode->numbr == 0.0))
+
+ if ($1->opcode == Op_K_print &&
+ ($3 == NULL
+ || ($3->lasti->opcode == Op_field_spec
+ && $3->nexti->nexti->nexti == $3->lasti
+ && $3->nexti->nexti->opcode == Op_push_i
+ && $3->nexti->nexti->memory->type == Node_val
+ && $3->nexti->nexti->memory->numbr == 0.0)
+ )
) {
static short warned = FALSE;
+ /* -----------------
+ * output_redir
+ * [ redirect exp ]
+ * -----------------
+ * expression_list
+ * ------------------
+ * [Op_K_print_rec | NULL | redir_type | expr_count]
+ */
- $$ = node(NULL, Node_K_print_rec, $4);
+ if ($3 != NULL) {
+ bcfree($3->lasti); /* Op_field_spec */
+ $3->nexti->nexti->memory->flags &= ~PERM;
+ $3->nexti->nexti->memory->flags |= MALLOC;
+ unref($3->nexti->nexti->memory); /* Node_val */
+ bcfree($3->nexti->nexti); /* Op_push_i */
+ bcfree($3->nexti); /* Op_list */
+ bcfree($3); /* Op_list */
+ } else {
+ if (do_lint && (rule == BEGIN || rule == END) && ! warned) {
+ warned = TRUE;
+ lintwarn(
+ _("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
+ }
+ }
- if (do_lint && $3 == NULL && begin_or_end_rule && ! warned) {
- warned = TRUE;
- lintwarn(
- _("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
+ $1->expr_count = 0;
+ $1->opcode = Op_K_print_rec;
+ if ($4 == NULL) { /* no redircetion */
+ $1->redir_type = 0;
+ $$ = list_create($1);
+ } else {
+ INSTRUCTION *ip;
+ ip = $4->nexti;
+ $1->redir_type = ip->redir_type;
+ $4->nexti = ip->nexti;
+ bcfree(ip);
+ $$ = list_append($4, $1);
}
} else {
- $$ = node($3, $1, $4);
- if ($$->type == Node_K_printf)
- count_args($$);
+ /* -----------------
+ * [ output_redir ]
+ * [ redirect exp ]
+ * -----------------
+ * [ expression_list ]
+ * ------------------
+ * [$1 | NULL | redir_type | expr_count]
+ *
+ */
+
+ if ($4 == NULL) { /* no redirection */
+ if ($3 == NULL) { /* printf without arg */
+ $1->expr_count = 0;
+ $1->redir_type = 0;
+ $$ = list_create($1);
+ } else {
+ INSTRUCTION *t = $3;
+ $1->expr_count = count_expressions(&t, FALSE);
+ $1->redir_type = 0;
+ $$ = list_append(t, $1);
+ }
+ } else {
+ INSTRUCTION *ip;
+ ip = $4->nexti;
+ $1->redir_type = ip->redir_type;
+ $4->nexti = ip->nexti;
+ bcfree(ip);
+ if ($3 == NULL) {
+ $1->expr_count = 0;
+ $$ = list_append($4, $1);
+ } else {
+ INSTRUCTION *t = $3;
+ $1->expr_count = count_expressions(&t, FALSE);
+ $$ = list_append(list_merge($4, t), $1);
+ }
+ }
}
}
- | LEX_DELETE NAME '[' expression_list ']'
- { $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); }
- | LEX_DELETE NAME
- {
- static short warned = FALSE;
- if (do_lint && ! warned) {
- warned = TRUE;
- lintwarn(_("`delete array' is a gawk extension"));
- }
- if (do_traditional) {
- /*
- * can't use yyerror, since may have overshot
- * the source line
- */
- errcount++;
- error(_("`delete array' is a gawk extension"));
- }
- $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
+ | LEX_DELETE NAME { sub_counter = 0; } delete_subscript_list
+ {
+ char *arr = $2->lextok;
+
+ $2->opcode = Op_push_array;
+ $2->memory = variable(arr, Node_var_array);
+
+ if ($4 == NULL) {
+ static short warned = FALSE;
+ if (do_lint && ! warned) {
+ warned = TRUE;
+ lintwarn(_("`delete array' is a gawk extension"));
+ }
+ if (do_traditional) {
+ /*
+ * can't use yyerror, since may have overshot
+ * the source line
+ */
+ errcount++;
+ error(_("`delete array' is a gawk extension"));
+ }
+ $1->expr_count = 0;
+ $$ = list_append(list_create($2), $1);
+ } else {
+ $1->expr_count = sub_counter;
+ $$ = list_append(list_append($4, $2), $1);
}
+ }
| LEX_DELETE '(' NAME ')'
- {
/*
* this is for tawk compatibility. maybe the warnings
* should always be done.
*/
- static short warned = FALSE;
+ {
+ static short warned = FALSE;
+ char *arr = $3->lextok;
- if (do_lint && ! warned) {
+ if (do_lint && ! warned) {
warned = TRUE;
lintwarn(_("`delete(array)' is a non-portable tawk extension"));
- }
- if (do_traditional) {
+ }
+ if (do_traditional) {
/*
* can't use yyerror, since may have overshot
- * the source line
+ * the source line.
*/
errcount++;
error(_("`delete(array)' is a non-portable tawk extension"));
- }
- $$ = node(variable($3, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
}
+ $3->memory = variable(arr, Node_var_array);
+ $3->opcode = Op_push_array;
+ $1->expr_count = 0;
+ $$ = list_append(list_create($3), $1);
+ }
| exp
- { $$ = $1; }
+ { $$ = optimize_assignment($1); }
;
opt_simple_stmt
: /* empty */
- { $$ = NULL; }
+ { $$ = NULL; }
| simple_stmt
- { $$ = $1; }
+ { $$ = $1; }
;
switch_body
: case_statements
- {
- if ($1 == NULL) {
- $$ = NULL;
- } else {
- NODE *dflt = NULL;
- NODE *head = $1;
- NODE *curr;
-
- const char **case_values = NULL;
-
- int maxcount = 128;
- int case_count = 0;
- int i;
-
- emalloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body");
- for (curr = $1; curr != NULL; curr = curr->rnode) {
- /* Assure that case statement values are unique. */
- if (curr->lnode->type == Node_K_case) {
- char *caseval;
-
- if (curr->lnode->lnode->type == Node_regex)
- caseval = curr->lnode->lnode->re_exp->stptr;
- else
- caseval = force_string(tree_eval(curr->lnode->lnode))->stptr;
-
- for (i = 0; i < case_count; i++)
- if (strcmp(caseval, case_values[i]) == 0)
- yyerror(_("duplicate case values in switch body: %s"), caseval);
-
- if (case_count >= maxcount) {
- maxcount += 128;
- erealloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body");
+ {
+ INSTRUCTION *dflt = NULL;
+
+ if ($1 != NULL) {
+ INSTRUCTION *curr;
+ const char **case_values = NULL;
+ int maxcount = 128;
+ int case_count = 0;
+ int i;
+
+ emalloc(case_values, const char **, sizeof(char *) * maxcount, "statement");
+
+ for (curr = $1->case_val->nexti; curr != NULL; curr = curr->nexti) {
+ if (curr->opcode == Op_K_case) {
+ char *caseval;
+ if (curr->memory->type == Node_regex)
+ caseval = curr->memory->re_exp->stptr;
+ else
+ caseval = force_string(curr->memory)->stptr;
+ for (i = 0; i < case_count; i++)
+ if (strcmp(caseval, case_values[i]) == 0)
+ yyerror(_("duplicate case values in switch body: %s"), caseval);
+
+ if (case_count >= maxcount) {
+ maxcount += 128;
+ erealloc(case_values, const char **, sizeof(char*) * maxcount, "statement");
+ }
+ case_values[case_count++] = caseval;
+ } else {
+ /* Otherwise save a pointer to the default node. */
+ if (dflt != NULL)
+ yyerror(_("duplicate `default' detected in switch body"));
+ dflt = curr;
}
- case_values[case_count++] = caseval;
- } else {
- /* Otherwise save a pointer to the default node. */
- if (dflt != NULL)
- yyerror(_("Duplicate `default' detected in switch body"));
- dflt = curr;
}
+
+ efree(case_values);
+ $$ = list_prepend($1->case_stmt, instruction(Op_K_switch));
+ $$->nexti->case_val = $1->case_val->nexti;
+ $$->nexti->switch_dflt = dflt;
+ bcfree($1->case_val); /* Op_list */
+ bcfree($1); /* Op_case_list */
+ } else {
+ $$ = list_create(instruction(Op_K_switch));
+ $$->nexti->case_val = NULL;
+ $$->nexti->switch_dflt = NULL;
}
-
- free(case_values);
-
- /* Create the switch body. */
- $$ = node(head, Node_switch_body, dflt);
}
- }
;
case_statements
@@ -685,20 +1035,15 @@ case_statements
{ $$ = NULL; }
| case_statements case_statement
{
- if ($2 == NULL)
+ if ($1 == NULL) {
+ $2->case_val = list_create($2->case_val);
+ $$ = $2;
+ } else {
+ (void) list_append($1->case_val, $2->case_val);
+ (void) list_merge($1->case_stmt, $2->case_stmt);
+ bcfree($2); /* Op_case_list */
$$ = $1;
- else {
- if (do_lint && isnoeffect($2->type))
- lintwarn(_("statement may have no effect"));
- if ($1 == NULL)
- $$ = node($2, Node_case_list, (NODE *) NULL);
- else
- $$ = append_right(
- ($1->type == Node_case_list ? $1 : node($1, Node_case_list, (NODE *) NULL)),
- ($2->type == Node_case_list ? $2 : node($2, Node_case_list, (NODE *) NULL))
- );
}
- yyerrok;
}
| case_statements error
{ $$ = NULL; }
@@ -706,30 +1051,77 @@ case_statements
case_statement
: LEX_CASE case_value colon opt_nls statements
- { $$ = node($2, Node_K_case, $5); }
+ {
+ INSTRUCTION *casestmt = $5;
+
+ $1->memory = $2->memory;
+ bcfree($2);
+ if ($5 == NULL)
+ casestmt = list_create(instruction(Op_no_op));
+ if (do_profiling)
+ (void) list_prepend(casestmt, instruction(Op_exec_count));
+
+ $1->target_stmt = casestmt->nexti;
+
+ /* recycle $3 as Op_case_list */
+ $3->opcode = Op_case_list;
+ $3->case_val = $1; /* Op_K_case */
+ $3->case_stmt = casestmt; /* Op_list */
+ $$ = $3;
+ }
| LEX_DEFAULT colon opt_nls statements
- { $$ = node((NODE *) NULL, Node_K_default, $4); }
+ {
+ INSTRUCTION *casestmt = $4;
+
+ if ($4 == NULL)
+ casestmt = list_create(instruction(Op_no_op));
+ if (do_profiling)
+ (void) list_prepend(casestmt, instruction(Op_exec_count));
+
+ $1->target_stmt = casestmt->nexti;
+ $2->opcode = Op_case_list;
+ $2->case_val = $1; /* Op_K_default */
+ $2->case_stmt = casestmt; /* Op_list */
+ $$ = $2;
+ }
;
case_value
: YNUMBER
- { $$ = $1; }
- | '-' YNUMBER %prec UNARY
{
- $2->numbr = -(force_number($2));
+ $1->opcode = Op_K_case;
+ $$ = $1;
+ }
+ | '-' YNUMBER %prec UNARY
+ {
+ $2->memory->numbr = -(force_number($2->memory));
+ bcfree($1);
+ $2->opcode = Op_K_case;
$$ = $2;
}
| '+' YNUMBER %prec UNARY
- { $$ = $2; }
- | YSTRING
- { $$ = $1; }
- | regexp
- { $$ = $1; }
+ {
+ bcfree($1);
+ $2->opcode = Op_K_case;
+ $$ = $2;
+ }
+ | YSTRING
+ {
+ $1->opcode = Op_K_case;
+ $$ = $1;
+ }
+ | regexp
+ {
+ $1->opcode = Op_K_case;
+ $$ = $1;
+ }
;
print
: LEX_PRINT
+ { $$ = $1; }
| LEX_PRINTF
+ { $$ = $1; }
;
/*
@@ -738,8 +1130,10 @@ print
*/
print_expression_list
: opt_expression_list
- | '(' exp comma expression_list r_paren
- { $$ = node($2, Node_expression_list, $4); }
+ | '(' expression_list r_paren
+ {
+ $$ = $2;
+ }
;
output_redir
@@ -751,25 +1145,24 @@ output_redir
}
| IO_OUT { in_print = FALSE; in_parens = 0; } common_exp
{
- $$ = node($3, $1, (NODE *) NULL);
- if ($1 == Node_redirect_twoway
- && $3->type == Node_K_getline
- && $3->rnode != NULL
- && $3->rnode->type == Node_redirect_twoway)
+ if ($1->redir_type == redirect_twoway
+ && $3->lasti->opcode == Op_K_getline_redir
+ && $3->lasti->redir_type == redirect_twoway)
yyerror(_("multistage two-way pipelines don't work"));
+ $$ = list_prepend($3, $1);
}
;
if_statement
: LEX_IF '(' exp r_paren opt_nls statement
{
- $$ = node($3, Node_K_if,
- node($6, Node_if_branches, (NODE *) NULL));
+ $$ = mk_condition($3, $1, $6, NULL, NULL);
}
| LEX_IF '(' exp r_paren opt_nls statement
LEX_ELSE opt_nls statement
- { $$ = node($3, Node_K_if,
- node($6, Node_if_branches, $9)); }
+ {
+ $$ = mk_condition($3, $1, $6, $7, $9);
+ }
;
nls
@@ -784,178 +1177,264 @@ opt_nls
input_redir
: /* empty */
- { $$ = NULL; }
+ { $$ = NULL; }
| '<' simp_exp
- { $$ = node($2, Node_redirect_input, (NODE *) NULL); }
+ {
+ bcfree($1);
+ $$ = $2;
+ }
;
opt_param_list
: /* empty */
- { $$ = NULL; }
| param_list
- { $$ = $1; }
;
param_list
: NAME
- { $$ = make_param($1); }
+ {
+ append_param($1->lextok);
+ $1->lextok = NULL;
+ bcfree($1);
+ }
| param_list comma NAME
- { $$ = append_right($1, make_param($3)); yyerrok; }
+ {
+ append_param($3->lextok);
+ $3->lextok = NULL;
+ bcfree($3);
+ yyerrok;
+ }
| error
- { $$ = NULL; }
+ { /* func_params = NULL; */ }
| param_list error
- { $$ = NULL; }
+ { /* func_params = NULL; */ }
| param_list comma error
- { $$ = NULL; }
+ { /* func_params = NULL; */ }
;
/* optional expression, as in for loop */
opt_exp
: /* empty */
- { $$ = NULL; }
+ { $$ = NULL; }
| exp
- { $$ = $1; }
+ { $$ = $1; }
;
opt_expression_list
: /* empty */
- { $$ = NULL; }
+ { $$ = NULL; }
| expression_list
- { $$ = $1; }
+ { $$ = $1; }
;
expression_list
: exp
- { $$ = node($1, Node_expression_list, (NODE *) NULL); }
+ { $$ = mk_expression_list(NULL, $1); }
| expression_list comma exp
- {
- $$ = append_right($1,
- node($3, Node_expression_list, (NODE *) NULL));
- yyerrok;
- }
+ {
+ $$ = mk_expression_list($1, $3);
+ yyerrok;
+ }
| error
- { $$ = NULL; }
+ { $$ = NULL; }
| expression_list error
- { $$ = NULL; }
+ { $$ = NULL; }
| expression_list error exp
- { $$ = NULL; }
+ { $$ = NULL; }
| expression_list comma error
- { $$ = NULL; }
+ { $$ = NULL; }
;
/* Expressions, not including the comma operator. */
-exp : variable assign_operator exp %prec ASSIGNOP
- {
- if (do_lint && $3->type == Node_regex)
+exp
+ : variable assign_operator exp %prec ASSIGNOP
+ {
+ if (do_lint && $3->lasti->opcode == Op_match_rec)
lintwarn(_("regular expression on right of assignment"));
- $$ = optimize_concat($1, $2, $3);
- }
+ $$ = mk_assignment($1, $3, $2);
+ }
| exp LEX_AND exp
- { $$ = node($1, Node_and, $3); }
+ { $$ = mk_boolean($1, $3, $2); }
| exp LEX_OR exp
- { $$ = node($1, Node_or, $3); }
+ { $$ = mk_boolean($1, $3, $2); }
| exp MATCHOP exp
- {
- if ($1->type == Node_regex)
+ {
+ if ($1->lasti->opcode == Op_match_rec)
warning(_("regular expression on left of `~' or `!~' operator"));
- $$ = node($1, $2, mk_rexp($3));
- }
- | exp LEX_IN NAME
- {
- if (do_lint_old)
- warning(_("old awk does not support the keyword `in' except after `for'"));
- $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1);
+
+ if ($3->lasti == $3->nexti && $3->nexti->opcode == Op_match_rec) {
+ $2->memory = $3->nexti->memory;
+ bcfree($3->nexti); /* Op_match_rec */
+ bcfree($3); /* Op_list */
+ $$ = list_append($1, $2);
+ } else {
+ $2->memory = make_regnode(Node_dynregex, NULL);
+ $$ = list_append(list_merge($1, $3), $2);
}
+ }
+ | exp LEX_IN simple_variable
+ {
+ if (do_lint_old)
+ warning(_("old awk does not support the keyword `in' except after `for'"));
+ $3->nexti->opcode = Op_push_array;
+ $2->opcode = Op_in_array;
+ $2->expr_count = 1;
+ $$ = list_append(list_merge($1, $3), $2);
+ }
| exp a_relop exp %prec RELOP
- {
- if (do_lint && $3->type == Node_regex)
+ {
+ if (do_lint && $3->lasti->opcode == Op_match_rec)
lintwarn(_("regular expression on right of comparison"));
- $$ = node($1, $2, $3);
- }
+ $$ = list_append(list_merge($1, $3), $2);
+ }
| exp '?' exp ':' exp
- { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
+ { $$ = mk_condition($1, $2, $3, $4, $5); }
| common_exp
- { $$ = $1; }
+ { $$ = $1; }
;
assign_operator
: ASSIGN
- { $$ = $1; }
+ { $$ = $1; }
| ASSIGNOP
- { $$ = $1; }
+ { $$ = $1; }
| SLASH_BEFORE_EQUAL ASSIGN /* `/=' */
- { $$ = Node_assign_quotient; }
+ {
+ $2->opcode = Op_assign_quotient;
+ $$ = $2;
+ }
;
relop_or_less
: RELOP
- { $$ = $1; }
+ { $$ = $1; }
| '<'
- { $$ = Node_less; }
+ { $$ = $1; }
;
+
a_relop
: relop_or_less
+ { $$ = $1; }
| '>'
- { $$ = Node_greater; }
+ { $$ = $1; }
;
common_exp
: simp_exp
- { $$ = $1; }
+ { $$ = $1; }
| simp_exp_nc
- { $$ = $1; }
+ { $$ = $1; }
| common_exp simp_exp %prec CONCAT_OP
- { $$ = constant_fold($1, Node_concat, $2); }
+ {
+ int count = 2;
+ int is_simple_var = FALSE;
+ INSTRUCTION *ip1, *ip2;
+
+ if ($1->lasti->opcode == Op_concat) {
+ /* multiple (> 2) adjacent strings optimization */
+ is_simple_var = ($1->lasti->concat_flag & CSVAR);
+ count = $1->lasti->expr_count + 1;
+ $1->lasti->opcode = Op_no_op;
+ } else {
+ is_simple_var = ($1->nexti->opcode == Op_push
+ && $1->lasti == $1->nexti); /* first exp. is a simple
+ * variable?; kludge for use
+ * in Op_assign_concat.
+ */
+ }
+ ip1 = $1->nexti;
+ ip2 = $2->nexti;
+ if (ip1->memory != NULL && ip1->memory->type == Node_val && ip1 == $1->lasti
+ && ip2->memory != NULL && ip2->memory->type == Node_val && ip2 == $2->lasti && do_optimize > 1){
+ size_t nlen;
+
+ ip1->memory = force_string(ip1->memory);
+ ip2->memory = force_string(ip2->memory);
+ nlen = ip1->memory->stlen + ip2->memory->stlen;
+ erealloc(ip1->memory->stptr, char *, nlen + 2, "constant fold");
+ memcpy(ip1->memory->stptr + ip1->memory->stlen, ip2->memory->stptr, ip2->memory->stlen);
+ ip1->memory->stlen = nlen;
+ ip1->memory->stptr[nlen] = '\0';
+ ip1->memory->flags &= ~(NUMCUR|NUMBER);
+ ip1->memory->flags |= (STRING|STRCUR);
+ bcfree($2);
+ bcfree(ip2);
+ $1->opcode = Op_push_i;
+ $$ = $1;
+ } else {
+ $$ = list_append(list_merge($1, $2), instruction(Op_concat));
+ $$->lasti->concat_flag = (is_simple_var ? CSVAR : 0);
+ $$->lasti->expr_count = count;
+ if (count > max_args)
+ max_args = count;
+ }
+ }
;
simp_exp
: non_post_simp_exp
/* Binary operators in order of decreasing precedence. */
| simp_exp '^' simp_exp
- { $$ = constant_fold($1, Node_exp, $3); }
+ { $$ = mk_binary($1, $3, $2); }
| simp_exp '*' simp_exp
- { $$ = constant_fold($1, Node_times, $3); }
+ { $$ = mk_binary($1, $3, $2); }
| simp_exp '/' simp_exp
- { $$ = constant_fold($1, Node_quotient, $3); }
+ { $$ = mk_binary($1, $3, $2); }
| simp_exp '%' simp_exp
- { $$ = constant_fold($1, Node_mod, $3); }
+ { $$ = mk_binary($1, $3, $2); }
| simp_exp '+' simp_exp
- { $$ = constant_fold($1, Node_plus, $3); }
+ { $$ = mk_binary($1, $3, $2); }
| simp_exp '-' simp_exp
- { $$ = constant_fold($1, Node_minus, $3); }
+ { $$ = mk_binary($1, $3, $2); }
| LEX_GETLINE opt_variable input_redir
- {
- /*
- * In BEGINFILE/ENDFILE, allow `getline var < file'
- */
- if (beginfile_or_endfile_rule) {
- if ($2 != NULL && $3 != NULL)
- ; /* all ok */
- else {
- if ($2 != NULL)
- fatal(_("`getline var' invalid inside %s rule"),
- parsing_endfile_rule ? "ENDFILE" : "BEGINFILE");
- else
- fatal(_("`getline' invalid inside %s rule"),
- parsing_endfile_rule ? "ENDFILE" : "BEGINFILE");
- }
- }
- if (do_lint && parsing_end_rule && $3 == NULL)
- lintwarn(_("non-redirected `getline' undefined inside END action"));
- $$ = node($2, Node_K_getline, $3);
+ {
+ /*
+ * In BEGINFILE/ENDFILE, allow `getline var < file'
+ */
+ if (rule == BEGINFILE || rule == ENDFILE) {
+ if ($2 != NULL && $3 != NULL)
+ ; /* all ok */
+ else {
+ if ($2 != NULL)
+ yyerror(_("`getline var' invalid inside `%s' rule"), ruletab[rule]);
+ else
+ yyerror(_("`getline' invalid inside `%s' rule"), ruletab[rule]);
+ YYABORT;
+ }
}
+
+ if (do_lint && rule == END && $3 == NULL)
+ lintwarn(_("non-redirected `getline' undefined inside END action"));
+ $$ = mk_getline($1, $2, $3, redirect_input);
+ }
| variable INCREMENT
- { $$ = node($1, Node_postincrement, (NODE *) NULL); }
+ {
+ $2->opcode = Op_postincrement;
+ $$ = mk_assignment($1, NULL, $2);
+ }
| variable DECREMENT
- { $$ = node($1, Node_postdecrement, (NODE *) NULL); }
- | '(' expression_list r_paren LEX_IN NAME
- {
- if (do_lint_old) {
+ {
+ $2->opcode = Op_postdecrement;
+ $$ = mk_assignment($1, NULL, $2);
+ }
+ | '(' expression_list r_paren LEX_IN simple_variable
+ {
+ if (do_lint_old) {
warning(_("old awk does not support the keyword `in' except after `for'"));
warning(_("old awk does not support multidimensional arrays"));
- }
- $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2);
}
+ $5->nexti->opcode = Op_push_array;
+ $4->opcode = Op_in_array;
+ if ($2 == NULL) { /* error */
+ errcount++;
+ $4->expr_count = 0;
+ $$ = list_merge($5, $4);
+ } else {
+ INSTRUCTION *t = $2;
+ $4->expr_count = count_expressions(&t, FALSE);
+ $$ = list_append(list_merge(t, $5), $4);
+ }
+ }
;
/* Expressions containing "| getline" lose the ability to be on the
@@ -963,161 +1442,320 @@ simp_exp
simp_exp_nc
: common_exp IO_IN LEX_GETLINE opt_variable
{
- $$ = node($4, Node_K_getline,
- node($1, $2, (NODE *) NULL));
+ $$ = mk_getline($3, $4, $1, $2->redir_type);
+ bcfree($2);
}
+ /* Binary operators in order of decreasing precedence. */
| simp_exp_nc '^' simp_exp
- { $$ = node($1, Node_exp, $3); }
+ { $$ = mk_binary($1, $3, $2); }
| simp_exp_nc '*' simp_exp
- { $$ = node($1, Node_times, $3); }
+ { $$ = mk_binary($1, $3, $2); }
| simp_exp_nc '/' simp_exp
- { $$ = node($1, Node_quotient, $3); }
+ { $$ = mk_binary($1, $3, $2); }
| simp_exp_nc '%' simp_exp
- { $$ = node($1, Node_mod, $3); }
+ { $$ = mk_binary($1, $3, $2); }
| simp_exp_nc '+' simp_exp
- { $$ = node($1, Node_plus, $3); }
+ { $$ = mk_binary($1, $3, $2); }
| simp_exp_nc '-' simp_exp
- { $$ = node($1, Node_minus, $3); }
+ { $$ = mk_binary($1, $3, $2); }
;
non_post_simp_exp
: regexp
- { $$ = $1; }
+ {
+ $$ = list_create($1);
+ }
| '!' simp_exp %prec UNARY
- { $$ = constant_fold($2, Node_not, (NODE *) NULL); }
+ {
+ if ($2->opcode == Op_match_rec) {
+ $2->opcode = Op_nomatch;
+ $1->opcode = Op_push_i;
+ $1->memory = mk_number(0.0, (PERM|NUMCUR|NUMBER));
+ $$ = list_append(list_append(list_create($1),
+ instruction(Op_field_spec)), $2);
+ } else {
+ INSTRUCTION *ip;
+ ip = $2->nexti;
+ if (ip->memory->type == Node_val && $2->lasti == ip && do_optimize > 1) {
+ NODE *ret;
+ if ((ip->memory->flags & (STRCUR|STRING)) != 0) {
+ if (ip->memory->stlen == 0) {
+ ret = make_number((AWKNUM) 1.0);
+ } else {
+ ret = make_number((AWKNUM) 0.0);
+ }
+ } else {
+ if (ip->memory->numbr == 0) {
+ ret = make_number((AWKNUM) 1.0);
+ } else {
+ ret = make_number((AWKNUM) 0.0);
+ }
+ }
+ ret->flags &= ~MALLOC;
+ ret->flags |= PERM;
+ $1->memory = ret;
+ $1->opcode = Op_push_i;
+ bcfree(ip);
+ bcfree($2);
+ $$ = list_create($1);
+ } else {
+ $1->opcode = Op_not;
+ add_lint($2, LINT_assign_in_cond);
+ $$ = list_append($2, $1);
+ }
+ }
+ }
| '(' exp r_paren
- { $$ = $2; }
- | LEX_BUILTIN
- '(' opt_expression_list r_paren
- { $$ = snode($3, Node_builtin, (int) $1); }
+ { $$ = $2; }
+ | LEX_BUILTIN '(' opt_expression_list r_paren
+ {
+ $$ = snode($3, $1);
+ if ($$ == NULL)
+ YYABORT;
+ }
| LEX_LENGTH '(' opt_expression_list r_paren
- { $$ = snode($3, Node_builtin, (int) $1); }
+ {
+ $$ = snode($3, $1);
+ if ($$ == NULL)
+ YYABORT;
+ }
| LEX_LENGTH
{
- static short warned1 = FALSE, warned2 = FALSE;
+ static short warned1 = FALSE;
if (do_lint && ! warned1) {
warned1 = TRUE;
lintwarn(_("call of `length' without parentheses is not portable"));
}
- $$ = snode((NODE *) NULL, Node_builtin, (int) $1);
- if (do_posix && ! warned2) {
- warned2 = TRUE;
- warning(_("call of `length' without parentheses is deprecated by POSIX"));
- }
+ $$ = snode(NULL, $1);
+ if ($$ == NULL)
+ YYABORT;
}
- | function_call
+ | func_call
| variable
| INCREMENT variable
- { $$ = node($2, Node_preincrement, (NODE *) NULL); }
+ {
+ $1->opcode = Op_preincrement;
+ $$ = mk_assignment($2, NULL, $1);
+ }
| DECREMENT variable
- { $$ = node($2, Node_predecrement, (NODE *) NULL); }
+ {
+ $1->opcode = Op_predecrement;
+ $$ = mk_assignment($2, NULL, $1);
+ }
| YNUMBER
- { $$ = $1; }
+ {
+ $$ = list_create($1);
+ }
| YSTRING
- { $$ = $1; }
-
+ {
+ $$ = list_create($1);
+ }
| '-' simp_exp %prec UNARY
- {
- if ($2->type == Node_val && ($2->flags & (STRCUR|STRING)) == 0) {
- $2->numbr = -(force_number($2));
+ {
+ if ($2->lasti->opcode == Op_push_i
+ && ($2->lasti->memory->flags & (STRCUR|STRING)) == 0) {
+ $2->lasti->memory->numbr = -(force_number($2->lasti->memory));
$$ = $2;
- } else
- $$ = node($2, Node_unary_minus, (NODE *) NULL);
+ bcfree($1);
+ } else {
+ $1->opcode = Op_unary_minus;
+ $$ = list_append($2, $1);
}
+ }
| '+' simp_exp %prec UNARY
- {
- /*
- * was: $$ = $2
- * POSIX semantics: force a conversion to numeric type
- */
- $$ = node (make_number(0.0), Node_plus, $2);
- }
+ {
+ /*
+ * was: $$ = $2
+ * POSIX semantics: force a conversion to numeric type
+ */
+ $1->opcode = Op_plus_i;
+ $1->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
+ $$ = list_append($2, $1);
+ }
;
-function_call
- : direct_function_call
+func_call
+ : direct_func_call
{
- func_use($1->rnode->stptr, FUNC_USE);
+ func_use($1->lasti->func_name, FUNC_USE);
$$ = $1;
}
- | '@' direct_function_call
+ | '@' direct_func_call
{
/* indirect function call */
+ INSTRUCTION *f, *t;
+ char *name;
+ NODE *indirect_var;
static short warned = FALSE;
+ const char *msg = _("indirect function calls are a gawk extension");
- if (do_lint && ! warned) {
+ if (do_traditional || do_posix)
+ yyerror("%s", msg);
+ else if (do_lint && ! warned) {
warned = TRUE;
- lintwarn(_("indirect function calls are a gawk extension"));
+ lintwarn("%s", msg);
}
+
+ f = $2->lasti;
+ f->opcode = Op_indirect_func_call;
+ name = estrdup(f->func_name, strlen(f->func_name));
+ indirect_var = variable(name, Node_var_new);
+ if (is_std_var(name))
+ yyerror(_("can not use special variable `%s' for indirect function call"), name);
+ t = instruction(Op_push);
+ t->memory = indirect_var;
+
+ /* prepend indirect var instead of appending to arguments (opt_expression_list),
+ * and pop it off in setup_frame (eval.c) (left to right evaluation order); Test case:
+ * f = "fun"
+ * @f(f="real_fun")
+ */
- $$ = $2;
- $$->type = Node_indirect_func_call;
+ $$ = list_prepend($2, t);
}
;
-direct_function_call
+direct_func_call
: FUNC_CALL '(' opt_expression_list r_paren
{
- $$ = node($3, Node_func_call, make_string($1, strlen($1)));
- $$->funcbody = NULL;
param_sanity($3);
- free($1);
+ $1->opcode = Op_func_call;
+ $1->func_body = NULL;
+ if ($3 == NULL) { /* no argument or error */
+ ($1 + 1)->expr_count = 0;
+ $$ = list_create($1);
+ } else {
+ INSTRUCTION *t = $3;
+ ($1 + 1)->expr_count = count_expressions(&t, TRUE);
+ $$ = list_append(t, $1);
+ }
}
;
opt_variable
: /* empty */
- { $$ = NULL; }
+ { $$ = NULL; }
| variable
- { $$ = $1; }
+ { $$ = $1; }
;
-variable
+delete_subscript_list
+ : /* empty */
+ { $$ = NULL; }
+ | delete_subscript SUBSCRIPT
+ { $$ = $1; }
+ ;
+
+delete_subscript
+ : delete_exp_list
+ { $$ = $1; }
+ | delete_subscript delete_exp_list
+ {
+ $$ = list_merge($1, $2);
+ }
+ ;
+
+delete_exp_list
+ : bracketed_exp_list
+ {
+ INSTRUCTION *ip = $1->lasti;
+ int count = ip->sub_count; /* # of SUBSEP-seperated expressions */
+ if (count > 1) {
+ /* change Op_subscript or Op_sub_array to Op_concat */
+ ip->opcode = Op_concat;
+ ip->concat_flag = CSUBSEP;
+ ip->expr_count = count;
+ } else
+ ip->opcode = Op_no_op;
+ sub_counter++; /* count # of dimensions */
+ $$ = $1;
+ }
+ ;
+
+bracketed_exp_list
+ : '[' expression_list ']'
+ {
+ INSTRUCTION *t = $2;
+ if ($2 == NULL) {
+ errcount++;
+ error(_("invalid subscript expression"));
+ /* install Null string as subscript. */
+ t = list_create(instruction(Op_push_i));
+ t->nexti->memory = Nnull_string;
+ $3->sub_count = 1;
+ } else
+ $3->sub_count = count_expressions(&t, FALSE);
+ $$ = list_append(t, $3);
+ }
+ ;
+
+subscript
+ : bracketed_exp_list
+ { $$ = $1; }
+ | subscript bracketed_exp_list
+ {
+ $$ = list_merge($1, $2);
+ }
+ ;
+
+subscript_list
+ : subscript SUBSCRIPT
+ { $$ = $1; }
+ ;
+
+simple_variable
: NAME
- { $$ = variable($1, CAN_FREE, Node_var_new); }
- | NAME '[' expression_list ']'
+ {
+ char *var_name = $1->lextok;
+
+ $1->opcode = Op_push;
+ $1->memory = variable(var_name, Node_var_new);
+ $$ = list_create($1);
+ }
+ | NAME subscript_list
{
NODE *n;
- if ((n = lookup($1)) != NULL && ! isarray(n)) {
+ char *arr = $1->lextok;
+ if ((n = lookup(arr)) != NULL && ! isarray(n))
yyerror(_("use of non-array as array"));
- $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3);
- } else if ($3 == NULL) {
- fatal(_("invalid subscript expression"));
- } else if ($3->rnode == NULL) {
- $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode);
- freenode($3);
- } else
- $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3);
+ $1->memory = variable(arr, Node_var_array);
+ $1->opcode = Op_push_array;
+ $$ = list_prepend($2, $1);
}
- | field_spec { $$ = $1; }
-/*
-#if 0
- | lex_builtin
- { fatal(_("can't use built-in function `%s' as a variable"), tokstart); }
-#endif
-*/
;
-field_spec
- : '$' non_post_simp_exp opt_incdec
+variable
+ : simple_variable
{
- NODE *n = node($2, Node_field_spec, (NODE *) NULL);
- if ($3 != NULL) {
- if ($3[0] == '+')
- $$ = node(n, Node_postincrement, (NODE *) NULL);
- else
- $$ = node(n, Node_postdecrement, (NODE *) NULL);
- } else {
- $$ = n;
- }
+ INSTRUCTION *ip = $1->nexti;
+ if (ip->opcode == Op_push
+ && ip->memory->type == Node_var
+ && ip->memory->var_update
+ ) {
+ $$ = list_prepend($1, instruction(Op_var_update));
+ $$->nexti->memory = ip->memory;
+ } else
+ $$ = $1;
+ }
+ | '$' non_post_simp_exp opt_incdec
+ {
+ $$ = list_append($2, $1);
+ if ($3 != NULL)
+ mk_assignment($2, NULL, $3);
}
;
opt_incdec
- : INCREMENT { $$ = "+"; }
- | DECREMENT { $$ = "-"; }
+ : INCREMENT
+ {
+ $1->opcode = Op_postincrement;
+ }
+ | DECREMENT
+ {
+ $1->opcode = Op_postdecrement;
+ }
| /* empty */ { $$ = NULL; }
;
@@ -1143,19 +1781,19 @@ semi
;
colon
- : ':' { yyerrok; }
+ : ':' { $$ = $1; yyerrok; }
;
-comma : ',' opt_nls { yyerrok; }
+comma
+ : ',' opt_nls { yyerrok; }
;
-
%%
struct token {
- const char *operator; /* text to match */
- NODETYPE value; /* node type */
- int class; /* lexical class */
- unsigned flags; /* # of args. allowed and compatability */
+ const char *operator; /* text to match */
+ OPCODE value; /* type */
+ int class; /* lexical class */
+ unsigned flags; /* # of args. allowed and compatability */
# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */
# define A(n) (1<<(n))
# define VERSION_MASK 0xFF00 /* old awk is zero */
@@ -1163,7 +1801,9 @@ struct token {
# define NOT_POSIX 0x0200 /* feature not in POSIX */
# define GAWKX 0x0400 /* gawk extension */
# define RESX 0x0800 /* Bell Labs Research extension */
- NODE *(*ptr) P((NODE *)); /* function that implements this keyword */
+# define BREAK 0x1000 /* break allowed inside */
+# define CONTINUE 0x2000 /* continue allowed inside */
+ NODE *(*ptr)(int); /* function that implements this keyword */
};
#if 'a' == 0x81 /* it's EBCDIC */
@@ -1188,77 +1828,79 @@ tokcompare(void *l, void *r)
*/
static const struct token tokentab[] = {
-{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0},
-{"BEGINFILE", Node_illegal, LEX_BEGINFILE, GAWKX, 0},
-{"END", Node_illegal, LEX_END, 0, 0},
-{"ENDFILE", Node_illegal, LEX_ENDFILE, GAWKX, 0},
+{"BEGIN", Op_rule, LEX_BEGIN, 0, 0},
+{"BEGINFILE", Op_rule, LEX_BEGINFILE, GAWKX, 0},
+{"END", Op_rule, LEX_END, 0, 0},
+{"ENDFILE", Op_rule, LEX_ENDFILE, GAWKX, 0},
#ifdef ARRAYDEBUG
-{"adump", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_adump},
+{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_adump},
#endif
-{"and", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_and},
-{"asort", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asort},
-{"asorti", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asorti},
-{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2},
-{"bindtextdomain", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain},
-{"break", Node_K_break, LEX_BREAK, 0, 0},
-{"case", Node_K_case, LEX_CASE, GAWKX, 0},
-{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close},
-{"compl", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl},
-{"continue", Node_K_continue, LEX_CONTINUE, 0, 0},
-{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos},
-{"dcgettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext},
-{"dcngettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext},
-{"default", Node_K_default, LEX_DEFAULT, GAWKX, 0},
-{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0},
-{"do", Node_K_do, LEX_DO, NOT_OLD, 0},
-{"else", Node_illegal, LEX_ELSE, 0, 0},
-{"exit", Node_K_exit, LEX_EXIT, 0, 0},
-{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},
-{"extension", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_ext},
-{"fflush", Node_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush},
-{"for", Node_K_for, LEX_FOR, 0, 0},
-{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
-{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
-{"gensub", Node_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub},
-{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
-{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
-{"if", Node_K_if, LEX_IF, 0, 0},
-{"in", Node_illegal, LEX_IN, 0, 0},
-{"index", Node_builtin, LEX_BUILTIN, A(2), do_index},
-{"int", Node_builtin, LEX_BUILTIN, A(1), do_int},
-{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length},
-{"log", Node_builtin, LEX_BUILTIN, A(1), do_log},
-{"lshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift},
-{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match},
-{"mktime", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_mktime},
-{"next", Node_K_next, LEX_NEXT, 0, 0},
-{"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0},
-{"or", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_or},
-{"patsplit", Node_builtin, LEX_BUILTIN, GAWKX|A(2)|A(3)|A(4), do_patsplit},
-{"print", Node_K_print, LEX_PRINT, 0, 0},
-{"printf", Node_K_printf, LEX_PRINTF, 0, 0},
-{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},
-{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0},
-{"rshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift},
-{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin},
-{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3)|A(4), do_split},
-{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
-{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
-{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
+{"and", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_and},
+{"asort", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asort},
+{"asorti", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asorti},
+{"atan2", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2},
+{"bindtextdomain", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain},
+{"break", Op_K_break, LEX_BREAK, 0, 0},
+{"case", Op_K_case, LEX_CASE, GAWKX, 0},
+{"close", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close},
+{"compl", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl},
+{"continue", Op_K_continue, LEX_CONTINUE, 0, 0},
+{"cos", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos},
+{"dcgettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext},
+{"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext},
+{"default", Op_K_default, LEX_DEFAULT, GAWKX, 0},
+{"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0},
+{"do", Op_symbol, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0},
+{"else", Op_K_else, LEX_ELSE, 0, 0},
+{"eval", Op_symbol, LEX_EVAL, 0, 0},
+{"exit", Op_K_exit, LEX_EXIT, 0, 0},
+{"exp", Op_builtin, LEX_BUILTIN, A(1), do_exp},
+{"extension", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_ext},
+{"fflush", Op_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush},
+{"for", Op_symbol, LEX_FOR, BREAK|CONTINUE, 0},
+{"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
+{"function",Op_func, LEX_FUNCTION, NOT_OLD, 0},
+{"gensub", Op_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub},
+{"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0},
+{"gsub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
+{"if", Op_K_if, LEX_IF, 0, 0},
+{"in", Op_symbol, LEX_IN, 0, 0},
+{"include", Op_symbol, LEX_INCLUDE, GAWKX, 0},
+{"index", Op_builtin, LEX_BUILTIN, A(2), do_index},
+{"int", Op_builtin, LEX_BUILTIN, A(1), do_int},
+{"length", Op_builtin, LEX_LENGTH, A(0)|A(1), do_length},
+{"log", Op_builtin, LEX_BUILTIN, A(1), do_log},
+{"lshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift},
+{"match", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match},
+{"mktime", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_mktime},
+{"next", Op_K_next, LEX_NEXT, 0, 0},
+{"nextfile", Op_K_nextfile, LEX_NEXTFILE, GAWKX, 0},
+{"or", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_or},
+{"patsplit", Op_builtin, LEX_BUILTIN, GAWKX|A(2)|A(3)|A(4), do_patsplit},
+{"print", Op_K_print, LEX_PRINT, 0, 0},
+{"printf", Op_K_printf, LEX_PRINTF, 0, 0},
+{"rand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},
+{"return", Op_K_return, LEX_RETURN, NOT_OLD, 0},
+{"rshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift},
+{"sin", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin},
+{"split", Op_builtin, LEX_BUILTIN, A(2)|A(3)|A(4), do_split},
+{"sprintf", Op_builtin, LEX_BUILTIN, 0, do_sprintf},
+{"sqrt", Op_builtin, LEX_BUILTIN, A(1), do_sqrt},
+{"srand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
#if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
-{"stopme", Node_builtin, LEX_BUILTIN, GAWKX|A(0), stopme},
+{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0), stopme},
#endif
-{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime},
-{"strtonum", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum},
-{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
-{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
-{"switch", Node_K_switch, LEX_SWITCH, GAWKX, 0},
-{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
-{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime},
-{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},
-{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},
-{"while", Node_K_while, LEX_WHILE, 0, 0},
-{"xor", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor},
+{"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime},
+{"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum},
+{"sub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
+{"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
+{"switch", Op_symbol, LEX_SWITCH, GAWKX|BREAK, 0},
+{"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
+{"systime", Op_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime},
+{"tolower", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},
+{"toupper", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},
+{"while", Op_symbol, LEX_WHILE, BREAK|CONTINUE, 0},
+{"xor", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor},
};
#ifdef MBS_SUPPORT
@@ -1281,9 +1923,9 @@ static int cur_ring_idx;
/* getfname --- return name of a builtin function (for pretty printing) */
const char *
-getfname(register NODE *(*fptr)(NODE *))
+getfname(NODE *(*fptr)(int))
{
- register int i, j;
+ int i, j;
j = sizeof(tokentab) / sizeof(tokentab[0]);
/* linear search, no other way to do it */
@@ -1302,22 +1944,44 @@ getfname(register NODE *(*fptr)(NODE *))
*/
static void
-#ifdef CAN_USE_STDARG_H
- yyerror(const char *m, ...)
-#else
-/* VARARGS0 */
- yyerror(va_alist)
- va_dcl
-#endif
+yyerror(const char *m, ...)
{
va_list args;
const char *mesg = NULL;
- register char *bp, *cp;
+ char *bp, *cp;
char *scan;
char *buf;
int count;
static char end_of_file_line[] = "(END OF FILE)";
char save;
+ int saveline;
+ SRCFILE *s;
+
+ /* suppress current file name, line # from `.. included from ..' msgs */
+ saveline = sourceline;
+ sourceline = 0;
+
+ for (s = sourcefile; s->stype == SRC_INC; ) {
+ int line;
+ s = s->next;
+ if (s->fd <= INVALID_HANDLE)
+ continue;
+
+ line = s->srclines;
+ /* if last token is NEWLINE, line number is off by 1. */
+ if (s->lasttok == NEWLINE)
+ line--;
+
+ msg("%s %s:%d%c",
+ s->prev == sourcefile ? "In file included from"
+ : " from",
+ (s->stype == SRC_INC ||
+ s->stype == SRC_FILE) ? s->src : "cmd. line",
+ line,
+ s->stype == SRC_INC ? ',' : ':'
+ );
+ }
+ sourceline = saveline;
errcount++;
/* Find the current line in the input file */
@@ -1357,15 +2021,9 @@ static void
*bp = save;
-#ifdef CAN_USE_STDARG_H
va_start(args, m);
if (mesg == NULL)
mesg = m;
-#else
- va_start(args);
- if (mesg == NULL)
- mesg = va_arg(args, char *);
-#endif
count = (bp - thisline) + strlen(mesg) + 2 + 1;
emalloc(buf, char *, count, "yyerror");
@@ -1384,7 +2042,333 @@ static void
strcpy(bp, mesg);
err("", buf, args);
va_end(args);
- free(buf);
+ efree(buf);
+}
+
+/* mk_program --- create a single list of instructions */
+
+static INSTRUCTION *
+mk_program()
+{
+ INSTRUCTION *cp, *tmp;
+
+#define begin_block rule_block[BEGIN]
+#define end_block rule_block[END]
+#define prog_block rule_block[Rule]
+#define beginfile_block rule_block[BEGINFILE]
+#define endfile_block rule_block[ENDFILE]
+
+ if (end_block == NULL)
+ end_block = list_create(ip_end);
+ else
+ (void) list_prepend(end_block, ip_end);
+
+ if (get_context()->level > 0) {
+ if (begin_block != NULL && prog_block != NULL)
+ cp = list_merge(begin_block, prog_block);
+ else
+ cp = (begin_block != NULL) ? begin_block : prog_block;
+
+ if (cp != NULL)
+ (void) list_merge(cp, end_block);
+ else
+ cp = end_block;
+
+ (void) list_append(cp, instruction(Op_stop));
+ goto out;
+ }
+
+ if (endfile_block == NULL)
+ endfile_block = list_create(ip_endfile);
+ else {
+ extern int has_endfile; /* kludge for use in inrec (io.c) */
+ has_endfile = TRUE;
+ (void) list_prepend(endfile_block, ip_endfile);
+ }
+
+ if (beginfile_block == NULL)
+ beginfile_block = list_create(ip_beginfile);
+ else
+ (void) list_prepend(beginfile_block, ip_beginfile);
+
+ if (prog_block == NULL) {
+ if (end_block->nexti == end_block->lasti
+ && beginfile_block->nexti == beginfile_block->lasti
+ && endfile_block->nexti == endfile_block->lasti
+ ) {
+ /* no pattern-action and (real) end, beginfile or endfile blocks */
+ bcfree(ip_rec);
+ bcfree(ip_newfile);
+ ip_rec = ip_newfile = NULL;
+
+ list_append(beginfile_block, instruction(Op_after_beginfile));
+ (void) list_append(endfile_block, instruction(Op_after_endfile));
+
+ if (begin_block == NULL) /* no program at all */
+ cp = end_block;
+ else
+ cp = list_merge(begin_block, end_block);
+ (void) list_append(cp, ip_atexit);
+ (void) list_append(cp, instruction(Op_stop));
+
+ /* append beginfile_block and endfile_block for sole use
+ * in getline without redirection (Op_K_getline).
+ */
+
+ (void) list_merge(cp, beginfile_block);
+ (void) list_merge(cp, endfile_block);
+
+ goto out;
+
+ } else {
+ /* install a do-nothing prog block */
+ prog_block = list_create(instruction(Op_no_op));
+ }
+ }
+
+ (void) list_append(endfile_block, instruction(Op_after_endfile));
+ (void) list_prepend(prog_block, ip_rec);
+ (void) list_append(prog_block, instruction(Op_jmp));
+ prog_block->lasti->target_jmp = ip_rec;
+
+ list_append(beginfile_block, instruction(Op_after_beginfile));
+
+ cp = list_merge(beginfile_block, prog_block);
+ (void) list_prepend(cp, ip_newfile);
+ (void) list_merge(cp, endfile_block);
+ (void) list_merge(cp, end_block);
+ if (begin_block != NULL)
+ cp = list_merge(begin_block, cp);
+
+ (void) list_append(cp, ip_atexit);
+ (void) list_append(cp, instruction(Op_stop));
+
+out:
+ /* delete the Op_list, not needed */
+ tmp = cp->nexti;
+ bcfree(cp);
+ return tmp;
+
+#undef begin_block
+#undef end_block
+#undef prog_block
+#undef beginfile_block
+#undef endfile_block
+}
+
+/* parse_program --- read in the program and convert into a list of instructions */
+
+int
+parse_program(INSTRUCTION **pcode)
+{
+ int ret;
+
+ /* pre-create non-local jump targets
+ * ip_end (Op_no_op) -- used as jump target for `exit'
+ * outside an END block.
+ */
+ ip_end = instruction(Op_no_op);
+
+ if (get_context()->level > 0)
+ ip_newfile = ip_rec = ip_atexit = ip_beginfile = ip_endfile = NULL;
+ else {
+ ip_endfile = instruction(Op_no_op);
+ ip_beginfile = instruction(Op_no_op);
+ ip_newfile = instruction(Op_newfile); /* target for `nextfile' */
+ ip_newfile->target_jmp = ip_end;
+ ip_newfile->target_endfile = ip_endfile;
+ ip_rec = instruction(Op_get_record); /* target for `next' */
+ ip_atexit = instruction(Op_atexit); /* target for `exit' in END block */
+ }
+
+ sourcefile = srcfiles->next;
+ lexeof = FALSE;
+ lexptr = NULL;
+ lasttok = 0;
+ memset(rule_block, 0, sizeof(ruletab) * sizeof(INSTRUCTION *));
+ errcount = 0;
+ tok = tokstart != NULL ? tokstart : tokexpand();
+
+ ret = yyparse();
+ *pcode = mk_program();
+
+ /* avoid false source indications */
+ source = NULL;
+ sourceline = 0;
+
+ check_funcs();
+ return (ret || errcount);
+}
+
+/* do_add_srcfile --- add one item to srcfiles */
+
+static SRCFILE *
+do_add_srcfile(int stype, char *src, char *path, SRCFILE *thisfile)
+{
+ SRCFILE *s;
+
+ emalloc(s, SRCFILE *, sizeof(SRCFILE), "do_add_srcfile");
+ memset(s, 0, sizeof(SRCFILE));
+ s->src = estrdup(src, strlen(src));
+ s->fullpath = path;
+ s->stype = stype;
+ s->fd = INVALID_HANDLE;
+ s->next = thisfile;
+ s->prev = thisfile->prev;
+ thisfile->prev->next = s;
+ thisfile->prev = s;
+ return s;
+}
+
+/* add_srcfile --- add one item to srcfiles after checking if
+ * a source file exists and not already in list.
+ */
+
+SRCFILE *
+add_srcfile(int stype, char *src, SRCFILE *thisfile, int *already_included, int *errcode)
+{
+ SRCFILE *s;
+ struct stat sbuf;
+ char *path;
+ int errno_val = 0;
+
+ if (already_included)
+ *already_included = FALSE;
+ if (errcode)
+ *errcode = 0;
+ if (stype == SRC_CMDLINE || stype == SRC_STDIN)
+ return do_add_srcfile(stype, src, NULL, thisfile);
+
+ path = find_source(src, &sbuf, &errno_val);
+ if (path == NULL) {
+ if (errcode) {
+ *errcode = errno_val;
+ return NULL;
+ }
+ fatal(_("can't open source file `%s' for reading (%s)"),
+ src, errno_val ? strerror(errno_val) : _("reason unknown"));
+ }
+
+ for (s = srcfiles->next; s != srcfiles; s = s->next) {
+ if ((s->stype == SRC_FILE || s->stype == SRC_INC)
+ && files_are_same(& sbuf, & s->sbuf)
+ ) {
+ if (do_lint)
+ lintwarn(_("already included source file `%s'"), src);
+ efree(path);
+ if (already_included)
+ *already_included = TRUE;
+ return NULL;
+ }
+ }
+
+ s = do_add_srcfile(stype, src, path, thisfile);
+ s->sbuf = sbuf;
+ s->mtime = sbuf.st_mtime;
+ return s;
+}
+
+/* include_source --- read program from source included using `@include' */
+
+static int
+include_source(char *src)
+{
+ SRCFILE *s;
+ int errcode;
+ int already_included;
+
+ if (do_traditional || do_posix) {
+ error(_("@include is a gawk extension"));
+ errcount++;
+ return -1;
+ }
+
+ if (strlen(src) == 0) {
+ if (do_lint)
+ lintwarn(_("empty filename after @include"));
+ return 0;
+ }
+
+ s = add_srcfile(SRC_INC, src, sourcefile, &already_included, &errcode);
+ if (s == NULL) {
+ if (already_included)
+ return 0;
+ error(_("can't open source file `%s' for reading (%s)"),
+ src, errcode ? strerror(errcode) : _("reason unknown"));
+ errcount++;
+ return -1;
+ }
+
+ /* save scanner state for the current sourcefile */
+ sourcefile->srclines = sourceline;
+ sourcefile->lexptr = lexptr;
+ sourcefile->lexend = lexend;
+ sourcefile->lexptr_begin = lexptr_begin;
+ sourcefile->lexeme = lexeme;
+ sourcefile->lasttok = lasttok;
+
+ /* included file becomes the current source */
+ sourcefile = s;
+ lexptr = NULL;
+ sourceline = 0;
+ source = NULL;
+ lasttok = 0;
+ lexeof = FALSE;
+ eof_warned = FALSE;
+ return 0;
+}
+
+/* next_sourcefile --- read program from the next source in srcfiles */
+
+static void
+next_sourcefile()
+{
+ static int (*closefunc)(int fd) = NULL;
+
+ if (closefunc == NULL) {
+ char *cp = getenv("AWKREADFUNC");
+
+ /* If necessary, one day, test value for different functions. */
+ if (cp == NULL)
+ closefunc = close;
+ else
+ closefunc = one_line_close;
+ }
+
+ assert(lexeof == TRUE);
+ lexeof = FALSE;
+ eof_warned = FALSE;
+ sourcefile->srclines = sourceline; /* total no of lines in current file */
+ if (sourcefile->fd > INVALID_HANDLE) {
+ if (sourcefile->fd != fileno(stdin)) /* safety */
+ (*closefunc)(sourcefile->fd);
+ sourcefile->fd = INVALID_HANDLE;
+ }
+ if (sourcefile->buf != NULL) {
+ efree(sourcefile->buf);
+ sourcefile->buf = NULL;
+ sourcefile->lexptr_begin = NULL;
+ }
+
+ sourcefile = sourcefile->next;
+ if (sourcefile == srcfiles)
+ return;
+
+ if (sourcefile->lexptr_begin != NULL) {
+ /* resume reading from already opened file (postponed to process '@include') */
+ lexptr = sourcefile->lexptr;
+ lexend = sourcefile->lexend;
+ lasttok = sourcefile->lasttok;
+ lexptr_begin = sourcefile->lexptr_begin;
+ lexeme = sourcefile->lexeme;
+ sourceline = sourcefile->srclines;
+ source = sourcefile->src;
+ } else {
+ lexptr = NULL;
+ sourceline = 0;
+ source = NULL;
+ lasttok = 0;
+ }
}
/* get_src_buf --- read the next buffer of source program */
@@ -1392,11 +2376,11 @@ static void
static char *
get_src_buf()
{
- static int samefile = FALSE;
- static int nextfile = 0;
- static char *buf = NULL;
- static size_t buflen = 0;
- static int fd;
+ int n;
+ char *scan;
+ int newfile;
+ int savelen;
+ struct stat sbuf;
/*
* No argument prototype on readfunc on purpose,
@@ -1404,105 +2388,94 @@ get_src_buf()
* the types of arguments to read() aren't up to date.
*/
static ssize_t (*readfunc)() = 0;
- static int (*closefunc)P((int fd)) = NULL;
-
- int n;
- register char *scan;
- int newfile;
- struct stat sbuf;
- int readcount = 0;
- int l;
- char *readloc;
if (readfunc == NULL) {
char *cp = getenv("AWKREADFUNC");
/* If necessary, one day, test value for different functions. */
- if (cp == NULL) {
+ if (cp == NULL)
readfunc = read;
- closefunc = close;
- } else {
+ else
readfunc = read_one_line;
- closefunc = one_line_close;
- }
}
-again:
newfile = FALSE;
- if (nextfile > numfiles)
+ if (sourcefile == srcfiles)
return NULL;
- if (srcfiles[nextfile].stype == CMDLINE) {
- if ((l = strlen(srcfiles[nextfile].val)) == 0) {
+ if (sourcefile->stype == SRC_CMDLINE) {
+ if (sourcefile->bufsize == 0) {
+ sourcefile->bufsize = strlen(sourcefile->src);
+ lexptr = lexptr_begin = lexeme = sourcefile->src;
+ lexend = lexptr + sourcefile->bufsize;
+ sourceline = 1;
+ if (sourcefile->bufsize == 0) {
+ /*
+ * Yet Another Special case:
+ * gawk '' /path/name
+ * Sigh.
+ */
+ static short warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warned = TRUE;
+ lintwarn(_("empty program text on command line"));
+ }
+ lexeof = TRUE;
+ }
+ } else if (sourcefile->buf == NULL && *(lexptr-1) != '\n') {
/*
- * Yet Another Special case:
- * gawk '' /path/name
- * Sigh.
+ * The following goop is to ensure that the source
+ * ends with a newline and that the entire current
+ * line is available for error messages.
*/
- static short warned = FALSE;
+ int offset;
+ char *buf;
- if (do_lint && ! warned) {
- warned = TRUE;
- lintwarn(_("empty program text on command line"));
- }
- ++nextfile;
- goto again;
- }
- if (srcfiles[nextfile].val[l-1] == '\n') {
- /* has terminating newline, can use it directly */
- sourceline = 1;
- source = NULL;
- lexptr = lexptr_begin = srcfiles[nextfile].val;
- /* fall through to pointer adjustment and return, below */
- } else {
- /* copy it into static buffer */
-
- /* make sure buffer exists and has room */
- if (buflen == 0) {
- emalloc(buf, char *, l+2, "get_src_buf");
- buflen = l + 2;
- } else if (l+2 > buflen) {
- erealloc(buf, char *, l+2, "get_src_buf");
- buflen = l + 2;
- } /* else
- buffer has room, just use it */
-
- /* copy in data */
- memcpy(buf, srcfiles[nextfile].val, l);
- buf[l] = '\n';
- buf[++l] = '\0';
-
- /* set vars and return */
- sourceline = 0;
- source = NULL;
- lexptr = lexptr_begin = buf;
- }
- lexend = lexptr + l;
- nextfile++; /* for next entry to this routine */
+ offset = lexptr - lexeme;
+ for (scan = lexeme; scan > lexptr_begin; scan--)
+ if (*scan == '\n') {
+ scan++;
+ break;
+ }
+ savelen = lexptr - scan;
+ emalloc(buf, char *, savelen + 1, "get_src_buf");
+ memcpy(buf, scan, savelen);
+ thisline = buf;
+ lexptr = buf + savelen;
+ *lexptr = '\n';
+ lexeme = lexptr - offset;
+ lexptr_begin = buf;
+ lexend = lexptr + 1;
+ sourcefile->buf = buf;
+ } else
+ lexeof = TRUE;
return lexptr;
}
- if (! samefile) {
- source = srcfiles[nextfile].val;
- if (source == NULL) { /* read all the source files, all done */
- if (buf != NULL) {
- free(buf);
- buf = NULL;
- }
- buflen = 0;
- return lexeme = lexptr = lexptr_begin = NULL;
- }
- fd = pathopen(source);
+ if (sourcefile->fd <= INVALID_HANDLE) {
+ int fd;
+ int l;
+
+ source = sourcefile->src;
+ if (source == NULL)
+ return NULL;
+ fd = srcopen(sourcefile);
if (fd <= INVALID_HANDLE) {
char *in;
/* suppress file name and line no. in error mesg */
in = source;
source = NULL;
- fatal(_("can't open source file `%s' for reading (%s)"),
+ error(_("can't open source file `%s' for reading (%s)"),
in, strerror(errno));
+ errcount++;
+ lexeof = TRUE;
+ return sourcefile->src;
}
- l = optimal_bufsize(fd, & sbuf);
+
+ sourcefile->fd = fd;
+ l = optimal_bufsize(fd, &sbuf);
/*
* Make sure that something silly like
* AWKBUFSIZE=8 make check
@@ -1512,110 +2485,71 @@ again:
if (l < A_DECENT_BUFFER_SIZE)
l = A_DECENT_BUFFER_SIZE;
#undef A_DECENT_BUFFER_SIZE
-
+ sourcefile->bufsize = l;
newfile = TRUE;
-
- /* make sure buffer exists and has room */
- if (buflen == 0) {
- emalloc(buf, char *, l+2, "get_src_buf");
- buflen = l + 2;
- } else if (l+2 > buflen) {
- erealloc(buf, char *, l+2, "get_src_buf");
- buflen = l + 2;
- } /* else
- buffer has room, just use it */
-
- readcount = l;
- readloc = lexeme = lexptr = lexptr_begin = buf;
- samefile = TRUE;
+ emalloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
+ lexptr = lexptr_begin = lexeme = sourcefile->buf;
+ savelen = 0;
sourceline = 1;
+ thisline = NULL;
} else {
/*
- * In same file, ran off edge of buffer.
- * Shift current line down to front, adjust
- * pointers and fill in the rest of the buffer.
+ * Here, we retain the current source line in the beginning of the buffer.
*/
-
- int lexeme_offset = lexeme - lexptr_begin;
- int lexptr_offset = lexptr - lexptr_begin;
- int lexend_offset = lexend - lexptr_begin;
-
- /* find beginning of current line */
- for (scan = lexeme; scan >= lexptr_begin; scan--) {
+ int offset;
+ for (scan = lexeme; scan > lexptr_begin; scan--)
if (*scan == '\n') {
scan++;
break;
}
- }
- /*
- * This condition can be read as follows: IF
- * 1. The beginning of the line is at the beginning of the
- * buffer (no newline was found: scan <= buf)
- * AND:
- * 2. The start of valid lexical data is into the buffer
- * (lexptr_begin > buf)
- * OR:
- * 3. We have scanned past the end of the last data read
- * (lexptr == lexend)
- * AND:
- * 4. There's no room left in the buffer
- * (lexptr_offset >= buflen - 2)
- *
- * If all that's true, grow the buffer to add more to
- * the current line.
- */
+ savelen = lexptr - scan;
+ offset = lexptr - lexeme;
- if (scan <= buf
- && (lexptr_begin > buf
- || (lexptr == lexend
- && lexptr_offset >= buflen - 2))) {
- /* have to grow the buffer */
- buflen *= 2;
- erealloc(buf, char *, buflen, "get_src_buf");
- } else if (scan > buf) {
- /* Line starts in middle of the buffer, shift things down. */
- memmove(buf, scan, lexend - scan);
+ if (savelen > 0) {
/*
- * make offsets relative to start of line,
- * not start of buffer.
+ * Need to make sure we have room left for reading new text;
+ * grow the buffer (by doubling, an arbitrary choice), if the retained line
+ * takes up more than a certain percentage (50%, again an arbitrary figure)
+ * of the available space.
*/
- lexend_offset = lexend - scan;
- lexeme_offset = lexeme - scan;
- lexptr_offset = lexptr - scan;
- }
-
- /* adjust pointers */
- lexeme = buf + lexeme_offset;
- lexptr = buf + lexptr_offset;
- lexend = buf + lexend_offset;
- lexptr_begin = buf;
- readcount = buflen - (lexend - buf);
- readloc = lexend;
- }
-
- /* add more data to buffer */
- n = (*readfunc)(fd, readloc, readcount);
- if (n == -1)
- fatal(_("can't read sourcefile `%s' (%s)"),
- source, strerror(errno));
- if (n == 0) {
- if (newfile) {
- static short warned = FALSE;
- if (do_lint && ! warned) {
+ if (savelen > sourcefile->bufsize / 2) { /* long line or token */
+ sourcefile->bufsize *= 2;
+ erealloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
+ scan = sourcefile->buf + (scan - lexptr_begin);
+ lexptr_begin = sourcefile->buf;
+ }
+
+ thisline = lexptr_begin;
+ memmove(thisline, scan, savelen);
+ lexptr = thisline + savelen;
+ lexeme = lexptr - offset;
+ } else {
+ savelen = 0;
+ lexptr = lexeme = lexptr_begin;
+ thisline = NULL;
+ }
+ }
+
+ n = (*readfunc)(sourcefile->fd, lexptr, sourcefile->bufsize - savelen);
+ if (n == -1) {
+ error(_("can't read sourcefile `%s' (%s)"),
+ source, strerror(errno));
+ errcount++;
+ lexeof = TRUE;
+ } else {
+ lexend = lexptr + n;
+ if (n == 0) {
+ static short warned = FALSE;
+ if (do_lint && newfile && ! warned){
warned = TRUE;
lintwarn(_("source file `%s' is empty"), source);
}
+ lexeof = TRUE;
}
- if (fd != fileno(stdin)) /* safety */
- (*closefunc)(fd);
- samefile = FALSE;
- nextfile++;
- goto again;
}
- lexend = lexptr + n;
- return lexptr;
+ return sourcefile->buf;
}
/* tokadd --- add a character to the token buffer */
@@ -1624,20 +2558,23 @@ again:
/* tokexpand --- grow the token buffer */
-char *
+static char *
tokexpand()
{
- static int toksize = 60;
+ static int toksize;
int tokoffset;
-
- tokoffset = tok - tokstart;
- toksize *= 2;
- if (tokstart != NULL)
+
+ if (tokstart != NULL) {
+ tokoffset = tok - tokstart;
+ toksize *= 2;
erealloc(tokstart, char *, toksize, "tokexpand");
- else
+ tok = tokstart + tokoffset;
+ } else {
+ toksize = 60;
emalloc(tokstart, char *, toksize, "tokexpand");
+ tok = tokstart;
+ }
tokend = tokstart + toksize;
- tok = tokstart + tokoffset;
return tok;
}
@@ -1649,9 +2586,13 @@ static int
nextc(void)
{
if (gawk_mb_cur_max > 1) {
- if (!lexptr || lexptr >= lexend) {
- if (! get_src_buf())
- return EOF;
+again:
+ if (lexeof)
+ return END_FILE;
+ if (lexptr == NULL || lexptr >= lexend) {
+ if (get_src_buf())
+ goto again;
+ return END_SRC;
}
/* Update the buffer index. */
@@ -1664,7 +2605,7 @@ nextc(void)
int idx, work_ring_idx = cur_ring_idx;
mbstate_t tmp_state;
size_t mbclen;
-
+
for (idx = 0 ; lexptr + idx < lexend ; idx++) {
tmp_state = cur_mbstate;
mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
@@ -1695,44 +2636,30 @@ nextc(void)
}
return (int) (unsigned char) *lexptr++;
- }
- else {
- int c;
-
- if (lexptr && lexptr < lexend)
- c = (int) (unsigned char) *lexptr++;
- else if (get_src_buf())
- c = (int) (unsigned char) *lexptr++;
- else
- c = EOF;
-
- return c;
+ } else {
+ do {
+ if (lexeof)
+ return END_FILE;
+ if (lexptr && lexptr < lexend)
+ return ((int) (unsigned char) *lexptr++);
+ } while (get_src_buf());
+ return END_SRC;
}
}
#else /* MBS_SUPPORT */
-#if GAWKDEBUG
int
-nextc(void)
+nextc()
{
- int c;
-
- if (lexptr && lexptr < lexend)
- c = (int) (unsigned char) *lexptr++;
- else if (get_src_buf())
- c = (int) (unsigned char) *lexptr++;
- else
- c = EOF;
-
- return c;
+ do {
+ if (lexeof)
+ return END_FILE;
+ if (lexptr && lexptr < lexend)
+ return ((int) (unsigned char) *lexptr++);
+ } while (get_src_buf());
+ return END_SRC;
}
-#else
-#define nextc() ((lexptr && lexptr < lexend) ? \
- ((int) (unsigned char) *lexptr++) : \
- (get_src_buf() ? ((int) (unsigned char) *lexptr++) : EOF) \
- )
-#endif
#endif /* MBS_SUPPORT */
@@ -1746,7 +2673,7 @@ pushback(void)
cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
cur_ring_idx - 1;
#endif
- (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
+ (! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
}
@@ -1759,13 +2686,17 @@ allow_newline(void)
for (;;) {
c = nextc();
- if (c == EOF)
+ if (c == END_FILE) {
+ pushback();
break;
+ }
if (c == '#') {
- while ((c = nextc()) != '\n' && c != EOF)
+ while ((c = nextc()) != '\n' && c != END_FILE)
continue;
- if (c == EOF)
+ if (c == END_FILE) {
+ pushback();
break;
+ }
}
if (c == '\n')
sourceline++;
@@ -1781,30 +2712,44 @@ allow_newline(void)
static int
yylex(void)
{
- register int c;
+ int c;
int seen_e = FALSE; /* These are for numbers */
int seen_point = FALSE;
int esc_seen; /* for literal strings */
int mid;
static int did_newline = FALSE;
char *tokkey;
- static int lasttok = 0;
- static short eof_warned = FALSE;
int inhex = FALSE;
int intlstr = FALSE;
- if (nextc() == EOF) {
- if (lasttok != NEWLINE) {
- lasttok = NEWLINE;
- if (do_lint && ! eof_warned) {
- lintwarn(_("source file does not end in newline"));
- eof_warned = TRUE;
- }
- return NEWLINE; /* fake it */
- }
- return 0;
+#define GET_INSTRUCTION(op) bcalloc(op, 1, sourceline)
+
+ /* NB: a newline at end does not start a source line. */
+
+#define NEWLINE_EOF \
+ (lasttok != NEWLINE ? \
+ (pushback(), do_lint && ! eof_warned && \
+ (lintwarn(_("source file does not end in newline")), \
+ eof_warned = TRUE), sourceline++, NEWLINE) : \
+ (sourceline--, eof_warned = FALSE, LEX_EOF))
+
+
+ yylval = (INSTRUCTION *) NULL;
+ if (lasttok == SUBSCRIPT) {
+ lasttok = 0;
+ return SUBSCRIPT;
}
+
+ if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */
+ return 0;
+
+ c = nextc();
+ if (c == END_SRC)
+ return 0;
+ if (c == END_FILE)
+ return lasttok = NEWLINE_EOF;
pushback();
+
#if defined OS2 || defined __EMX__
/*
* added for OS/2's extproc feature of cmd.exe
@@ -1815,6 +2760,7 @@ yylex(void)
lexptr++;
}
#endif
+
lexeme = lexptr;
thisline = NULL;
if (want_regexp) {
@@ -1858,7 +2804,8 @@ yylex(void)
in_brack--;
break;
case '\\':
- if ((c = nextc()) == EOF) {
+ if ((c = nextc()) == END_FILE) {
+ pushback();
yyerror(_("unterminated regexp ends with `\\' at end of file"));
goto end_regexp; /* kludge */
} else if (c == '\n') {
@@ -1874,8 +2821,8 @@ yylex(void)
if (in_brack > 0)
break;
end_regexp:
- tokadd('\0');
- yylval.sval = tokstart;
+ yylval = GET_INSTRUCTION(Op_token);
+ yylval->lextok = estrdup(tokstart, tok - tokstart);
if (do_lint) {
int peek = nextc();
@@ -1896,7 +2843,8 @@ end_regexp:
pushback();
yyerror(_("unterminated regexp"));
goto end_regexp; /* kludge */
- case EOF:
+ case END_FILE:
+ pushback();
yyerror(_("unterminated regexp at end of file"));
goto end_regexp; /* kludge */
}
@@ -1912,42 +2860,32 @@ retry:
lexeme = lexptr ? lexptr - 1 : lexptr;
thisline = NULL;
tok = tokstart;
- yylval.nodetypeval = Node_illegal;
-
- if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
- case EOF:
- if (lasttok != NEWLINE) {
- lasttok = NEWLINE;
- if (do_lint && ! eof_warned) {
- lintwarn(_("source file does not end in newline"));
- eof_warned = TRUE;
- }
- return NEWLINE; /* fake it */
- }
+
+#ifdef MBS_SUPPORT
+ if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
+#endif
+ switch (c) {
+ case END_SRC:
return 0;
+ case END_FILE:
+ return lasttok = NEWLINE_EOF;
+
case '\n':
sourceline++;
return lasttok = NEWLINE;
case '#': /* it's a comment */
while ((c = nextc()) != '\n') {
- if (c == EOF) {
- if (lasttok != NEWLINE) {
- lasttok = NEWLINE;
- if (do_lint && ! eof_warned) {
- lintwarn(
- _("source file does not end in newline"));
- eof_warned = TRUE;
- }
- return NEWLINE; /* fake it */
- }
- return 0;
- }
+ if (c == END_FILE)
+ return lasttok = NEWLINE_EOF;
}
sourceline++;
return lasttok = NEWLINE;
+ case '@':
+ return lasttok = '@';
+
case '\\':
#ifdef RELAXED_CONTINUATION
/*
@@ -1969,23 +2907,27 @@ retry:
_("use of `\\ #...' line continuation is not portable"));
}
while ((c = nextc()) != '\n')
- if (c == EOF)
+ if (c == END_FILE)
break;
}
pushback();
}
#endif /* RELAXED_CONTINUATION */
- if (nextc() == '\n') {
+ c = nextc();
+ if (c == '\r') /* allow MS-DOS files. bleah */
+ c = nextc();
+ if (c == '\n') {
sourceline++;
goto retry;
} else {
yyerror(_("backslash not last character on line"));
- exit(EXIT_FAILURE);
+ return lasttok = LEX_EOF;
}
break;
case ':':
case '?':
+ yylval = GET_INSTRUCTION(Op_cond_exp);
if (! do_posix)
allow_newline();
return lasttok = c;
@@ -2000,21 +2942,36 @@ retry:
case '(':
in_parens++;
- /* FALL THROUGH */
+ return lasttok = c;
case '$':
- case ';':
+ yylval = GET_INSTRUCTION(Op_field_spec);
+ return lasttok = c;
case '{':
+ if (++in_braces == 1)
+ firstline = sourceline;
+ case ';':
case ',':
case '[':
+ return lasttok = c;
case ']':
- return lasttok = c;
+ c = nextc();
+ pushback();
+ if (c == '[') {
+ yylval = GET_INSTRUCTION(Op_sub_array);
+ lasttok = ']';
+ } else {
+ yylval = GET_INSTRUCTION(Op_subscript);
+ lasttok = SUBSCRIPT; /* end of subscripts */
+ }
+ return ']';
case '*':
if ((c = nextc()) == '=') {
- yylval.nodetypeval = Node_assign_times;
+ yylval = GET_INSTRUCTION(Op_assign_times);
return lasttok = ASSIGNOP;
} else if (do_posix) {
pushback();
+ yylval = GET_INSTRUCTION(Op_times);
return lasttok = '*';
} else if (c == '*') {
/* make ** and **= aliases for ^ and ^= */
@@ -2028,7 +2985,7 @@ retry:
if (do_lint_old)
warning(_("old awk does not support operator `**='"));
}
- yylval.nodetypeval = Node_assign_exp;
+ yylval = GET_INSTRUCTION(Op_assign_exp);
return ASSIGNOP;
} else {
pushback();
@@ -2039,10 +2996,12 @@ retry:
if (do_lint_old)
warning(_("old awk does not support operator `**'"));
}
+ yylval = GET_INSTRUCTION(Op_exp);
return lasttok = '^';
}
}
pushback();
+ yylval = GET_INSTRUCTION(Op_times);
return lasttok = '*';
case '/':
@@ -2051,14 +3010,16 @@ retry:
return lasttok = SLASH_BEFORE_EQUAL;
}
pushback();
+ yylval = GET_INSTRUCTION(Op_quotient);
return lasttok = '/';
case '%':
if (nextc() == '=') {
- yylval.nodetypeval = Node_assign_mod;
+ yylval = GET_INSTRUCTION(Op_assign_mod);
return lasttok = ASSIGNOP;
}
pushback();
+ yylval = GET_INSTRUCTION(Op_mod);
return lasttok = '%';
case '^':
@@ -2070,7 +3031,7 @@ retry:
did_warn_assgn = TRUE;
warning(_("operator `^=' is not supported in old awk"));
}
- yylval.nodetypeval = Node_assign_exp;
+ yylval = GET_INSTRUCTION(Op_assign_exp);
return lasttok = ASSIGNOP;
}
pushback();
@@ -2078,67 +3039,74 @@ retry:
did_warn_op = TRUE;
warning(_("operator `^' is not supported in old awk"));
}
+ yylval = GET_INSTRUCTION(Op_exp);
return lasttok = '^';
}
case '+':
if ((c = nextc()) == '=') {
- yylval.nodetypeval = Node_assign_plus;
+ yylval = GET_INSTRUCTION(Op_assign_plus);
return lasttok = ASSIGNOP;
}
- if (c == '+')
+ if (c == '+') {
+ yylval = GET_INSTRUCTION(Op_symbol);
return lasttok = INCREMENT;
+ }
pushback();
+ yylval = GET_INSTRUCTION(Op_plus);
return lasttok = '+';
case '!':
if ((c = nextc()) == '=') {
- yylval.nodetypeval = Node_notequal;
+ yylval = GET_INSTRUCTION(Op_notequal);
return lasttok = RELOP;
}
if (c == '~') {
- yylval.nodetypeval = Node_nomatch;
+ yylval = GET_INSTRUCTION(Op_nomatch);
return lasttok = MATCHOP;
}
pushback();
+ yylval = GET_INSTRUCTION(Op_symbol);
return lasttok = '!';
case '<':
if (nextc() == '=') {
- yylval.nodetypeval = Node_leq;
+ yylval = GET_INSTRUCTION(Op_leq);
return lasttok = RELOP;
}
- yylval.nodetypeval = Node_less;
+ yylval = GET_INSTRUCTION(Op_less);
pushback();
return lasttok = '<';
case '=':
if (nextc() == '=') {
- yylval.nodetypeval = Node_equal;
+ yylval = GET_INSTRUCTION(Op_equal);
return lasttok = RELOP;
}
- yylval.nodetypeval = Node_assign;
+ yylval = GET_INSTRUCTION(Op_assign);
pushback();
return lasttok = ASSIGN;
case '>':
if ((c = nextc()) == '=') {
- yylval.nodetypeval = Node_geq;
+ yylval = GET_INSTRUCTION(Op_geq);
return lasttok = RELOP;
} else if (c == '>') {
- yylval.nodetypeval = Node_redirect_append;
+ yylval = GET_INSTRUCTION(Op_symbol);
+ yylval->redir_type = redirect_append;
return lasttok = IO_OUT;
}
pushback();
if (in_print && in_parens == 0) {
- yylval.nodetypeval = Node_redirect_output;
+ yylval = GET_INSTRUCTION(Op_symbol);
+ yylval->redir_type = redirect_output;
return lasttok = IO_OUT;
}
- yylval.nodetypeval = Node_greater;
+ yylval = GET_INSTRUCTION(Op_greater);
return lasttok = '>';
case '~':
- yylval.nodetypeval = Node_match;
+ yylval = GET_INSTRUCTION(Op_match);
return lasttok = MATCHOP;
case '}':
@@ -2148,6 +3116,8 @@ retry:
*/
if (did_newline) {
did_newline = FALSE;
+ if (--in_braces == 0)
+ lastline = sourceline;
return lasttok = c;
}
did_newline++;
@@ -2161,7 +3131,7 @@ retry:
if (c == '\n') {
pushback();
yyerror(_("unterminated string"));
- exit(EXIT_FAILURE);
+ return lasttok = LEX_EOF;
}
if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
c == '\\') {
@@ -2171,35 +3141,46 @@ retry:
continue;
}
esc_seen = TRUE;
- tokadd('\\');
+ if (! want_source || c != '"')
+ tokadd('\\');
}
- if (c == EOF) {
+ if (c == END_FILE) {
pushback();
yyerror(_("unterminated string"));
- exit(EXIT_FAILURE);
+ return lasttok = LEX_EOF;
}
tokadd(c);
}
- yylval.nodeval = make_str_node(tokstart,
- tok - tokstart, esc_seen ? SCAN : 0);
- yylval.nodeval->flags |= PERM;
+ yylval = GET_INSTRUCTION(Op_token);
+ if (want_source) {
+ yylval->lextok = estrdup(tokstart, tok - tokstart);
+ return lasttok = FILENAME;
+ }
+
+ yylval->opcode = Op_push_i;
+ yylval->memory = make_str_node(tokstart,
+ tok - tokstart, esc_seen ? SCAN : 0);
+ yylval->memory->flags &= ~MALLOC;
+ yylval->memory->flags |= PERM;
if (intlstr) {
- yylval.nodeval->flags |= INTLSTR;
+ yylval->memory->flags |= INTLSTR;
intlstr = FALSE;
if (do_intl)
- dumpintlstr(yylval.nodeval->stptr,
- yylval.nodeval->stlen);
- }
+ dumpintlstr(yylval->memory->stptr, yylval->memory->stlen);
+ }
return lasttok = YSTRING;
case '-':
if ((c = nextc()) == '=') {
- yylval.nodetypeval = Node_assign_minus;
+ yylval = GET_INSTRUCTION(Op_assign_minus);
return lasttok = ASSIGNOP;
}
- if (c == '-')
+ if (c == '-') {
+ yylval = GET_INSTRUCTION(Op_symbol);
return lasttok = DECREMENT;
+ }
pushback();
+ yylval = GET_INSTRUCTION(Op_minus);
return lasttok = '-';
case '.':
@@ -2309,13 +3290,10 @@ retry:
break;
c = nextc();
}
- if (c != EOF)
- pushback();
- else if (do_lint && ! eof_warned) {
- lintwarn(_("source file does not end in newline"));
- eof_warned = TRUE;
- }
+ pushback();
+
tokadd('\0');
+ yylval = GET_INSTRUCTION(Op_push_i);
if (! do_traditional && isnondecimal(tokstart, FALSE)) {
if (do_lint) {
if (isdigit(tokstart[1])) /* not an 'x' or 'X' */
@@ -2325,48 +3303,47 @@ retry:
lintwarn("numeric constant `%.*s' treated as hexadecimal",
(int) strlen(tokstart)-1, tokstart);
}
- yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart)));
+ yylval->memory = mk_number(nondec2awknum(tokstart, strlen(tokstart)),
+ PERM|NUMCUR|NUMBER);
} else
- yylval.nodeval = make_number(atof(tokstart));
- yylval.nodeval->flags |= PERM;
+ yylval->memory = mk_number(atof(tokstart), PERM|NUMCUR|NUMBER);
return lasttok = YNUMBER;
case '&':
if ((c = nextc()) == '&') {
- yylval.nodetypeval = Node_and;
+ yylval = GET_INSTRUCTION(Op_and);
allow_newline();
return lasttok = LEX_AND;
}
pushback();
+ yylval = GET_INSTRUCTION(Op_symbol);
return lasttok = '&';
case '|':
if ((c = nextc()) == '|') {
- yylval.nodetypeval = Node_or;
+ yylval = GET_INSTRUCTION(Op_or);
allow_newline();
return lasttok = LEX_OR;
} else if (! do_traditional && c == '&') {
- yylval.nodetypeval = Node_redirect_twoway;
+ yylval = GET_INSTRUCTION(Op_symbol);
+ yylval->redir_type = redirect_twoway;
return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
}
pushback();
if (in_print && in_parens == 0) {
- yylval.nodetypeval = Node_redirect_pipe;
+ yylval = GET_INSTRUCTION(Op_symbol);
+ yylval->redir_type = redirect_pipe;
return lasttok = IO_OUT;
} else {
- yylval.nodetypeval = Node_redirect_pipein;
+ yylval = GET_INSTRUCTION(Op_symbol);
+ yylval->redir_type = redirect_pipein;
return lasttok = IO_IN;
}
-
- case '@': /* indirect function call */
- if (do_posix || do_traditional)
- break;
- return lasttok = c;
}
if (c != '_' && ! isalpha(c)) {
yyerror(_("invalid char '%c' in expression"), c);
- exit(EXIT_FAILURE);
+ return lasttok = LEX_EOF;
}
/*
@@ -2394,23 +3371,21 @@ retry:
/* it's some type of name-type-thing. Find its length. */
tok = tokstart;
- while (is_identchar(c)) {
+ while (c != END_FILE && is_identchar(c)) {
tokadd(c);
c = nextc();
}
tokadd('\0');
- emalloc(tokkey, char *, tok - tokstart, "yylex");
- memcpy(tokkey, tokstart, tok - tokstart);
- if (c != EOF)
- pushback();
- else if (do_lint && ! eof_warned) {
- lintwarn(_("source file does not end in newline"));
- eof_warned = TRUE;
- }
+ pushback();
/* See if it is a special token. */
-
if ((mid = check_special(tokstart)) >= 0) {
+ int class = tokentab[mid].class;
+
+ if ((class == LEX_INCLUDE || class == LEX_EVAL)
+ && lasttok != '@')
+ goto out;
+
if (do_lint) {
if (tokentab[mid].flags & GAWKX)
lintwarn(_("`%s' is a gawk extension"),
@@ -2425,26 +3400,64 @@ retry:
if (do_lint_old && (tokentab[mid].flags & NOT_OLD))
warning(_("`%s' is not supported in old awk"),
tokentab[mid].operator);
+ if (tokentab[mid].flags & BREAK)
+ break_allowed++;
+ if (tokentab[mid].flags & CONTINUE)
+ continue_allowed++;
if ((do_traditional && (tokentab[mid].flags & GAWKX))
|| (do_posix && (tokentab[mid].flags & NOT_POSIX)))
- ;
- else {
- if (tokentab[mid].class == LEX_BUILTIN
- || tokentab[mid].class == LEX_LENGTH)
- yylval.lval = mid;
- else
- yylval.nodetypeval = tokentab[mid].value;
- free(tokkey);
- return lasttok = tokentab[mid].class;
+ goto out;
+
+ switch (class) {
+ case LEX_INCLUDE:
+ want_source = TRUE;
+ break;
+ case LEX_EVAL:
+ if (get_context()->level == 0)
+ goto out;
+ emalloc(tokkey, char *, tok - tokstart + 1, "yylex");
+ tokkey[0] = '@';
+ memcpy(tokkey + 1, tokstart, tok - tokstart);
+ yylval = GET_INSTRUCTION(Op_token);
+ yylval->lextok = tokkey;
+ break;
+
+ case LEX_FUNCTION:
+ case LEX_BEGIN:
+ case LEX_END:
+ case LEX_BEGINFILE:
+ case LEX_ENDFILE:
+ yylval = bcalloc(tokentab[mid].value, 3, sourceline);
+ break;
+
+ case LEX_WHILE:
+ case LEX_DO:
+ case LEX_FOR:
+ case LEX_SWITCH:
+ yylval = bcalloc(tokentab[mid].value,
+ !!do_profiling + 1, sourceline);
+ break;
+
+ default:
+ yylval = GET_INSTRUCTION(tokentab[mid].value);
+ if (class == LEX_BUILTIN || class == LEX_LENGTH)
+ yylval->builtin_idx = mid;
+ break;
}
+ return lasttok = class;
}
-
- yylval.sval = tokkey;
- if (*lexptr == '(')
+out:
+ tokkey = estrdup(tokstart, tok - tokstart);
+ if (*lexptr == '(') {
+ yylval = bcalloc(Op_token, 2, sourceline);
+ yylval->lextok = tokkey;
return lasttok = FUNC_CALL;
- else {
+ } else {
static short goto_warned = FALSE;
+ yylval = GET_INSTRUCTION(Op_token);
+ yylval->lextok = tokkey;
+
#define SMART_ALECK 1
if (SMART_ALECK && do_lint
&& ! goto_warned && strcasecmp(tokkey, "goto") == 0) {
@@ -2453,147 +3466,215 @@ retry:
}
return lasttok = NAME;
}
-}
-
-/* node_common --- common code for allocating a new node */
-
-static NODE *
-node_common(NODETYPE op)
-{
- register NODE *r;
- getnode(r);
- r->type = op;
- r->flags = MALLOC;
- /* if lookahead is a NL, lineno is 1 too high */
- if (lexeme && lexeme >= lexptr_begin && *lexeme == '\n')
- r->source_line = sourceline - 1;
- else
- r->source_line = sourceline;
- r->source_file = source;
- return r;
+#undef GET_INSTRUCTION
+#undef NEWLINE_EOF
}
-/* node --- allocates a node with defined lnode and rnode. */
+/* mk_symbol --- allocates a symbol for the symbol table. */
NODE *
-node(NODE *left, NODETYPE op, NODE *right)
+mk_symbol(NODETYPE type, NODE *value)
{
- register NODE *r;
+ NODE *r;
- r = node_common(op);
- r->lnode = left;
- r->rnode = right;
+ getnode(r);
+ r->type = type;
+ r->flags = MALLOC;
+ r->lnode = value;
+ r->rnode = NULL;
+ r->var_assign = (Func_ptr) 0;
return r;
}
-/* snode --- allocate a node with defined subnode and builtin for builtin
- functions. Checks for arg. count and supplies defaults where
- possible. */
+/* snode --- instructions for builtin functions. Checks for arg. count
+ and supplies defaults where possible. */
-static NODE *
-snode(NODE *subn, NODETYPE op, int idx)
+static INSTRUCTION *
+snode(INSTRUCTION *subn, INSTRUCTION *r)
{
- register NODE *r;
- register NODE *n;
+ INSTRUCTION *arg;
+ INSTRUCTION *ip;
+ NODE *n;
int nexp = 0;
int args_allowed;
+ int idx = r->builtin_idx;
+
+ if (subn != NULL) {
+ INSTRUCTION *tp;
+ for (tp = subn->nexti; tp; tp = tp->nexti) {
+ /* assert(tp->opcode == Op_list); */
+ tp = tp->lasti;
+ nexp++;
+ }
+ assert(nexp > 0);
+ }
- r = node_common(op);
-
- /* traverse expression list to see how many args. given */
- for (n = subn; n != NULL; n = n->rnode) {
- nexp++;
- if (nexp > 5)
- break;
- }
+ r->builtin = tokentab[idx].ptr;
/* check against how many args. are allowed for this builtin */
args_allowed = tokentab[idx].flags & ARGS;
- if (args_allowed && (args_allowed & A(nexp)) == 0)
- fatal(_("%d is invalid as number of arguments for %s"),
+ if (args_allowed && (args_allowed & A(nexp)) == 0) {
+ yyerror(_("%d is invalid as number of arguments for %s"),
nexp, tokentab[idx].operator);
-
- r->builtin = tokentab[idx].ptr;
+ return NULL;
+ }
/* special case processing for a few builtins */
- if (nexp == 0 && r->builtin == do_length) {
- subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL),
- Node_expression_list,
- (NODE *) NULL);
+ if (r->builtin == do_length) {
+ if (nexp == 0) {
+ INSTRUCTION *list; /* no args. Use $0 */
+
+ r->expr_count = 1;
+ list = list_create(r);
+ (void) list_prepend(list, instruction(Op_field_spec));
+ (void) list_prepend(list, instruction(Op_push_i));
+ list->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
+ return list;
+ }
} else if (r->builtin == do_match) {
static short warned = FALSE;
- if (subn->rnode->lnode->type != Node_regex)
- subn->rnode->lnode = mk_rexp(subn->rnode->lnode);
+ arg = subn->nexti->lasti->nexti; /* 2nd arg list */
+ (void) mk_rexp(arg);
- if (subn->rnode->rnode != NULL) { /* 3rd argument there */
+ if (nexp == 3) { /* 3rd argument there */
if (do_lint && ! warned) {
warned = TRUE;
lintwarn(_("match: third argument is a gawk extension"));
}
- if (do_traditional)
- fatal(_("match: third argument is a gawk extension"));
+ if (do_traditional) {
+ yyerror(_("match: third argument is a gawk extension"));
+ return NULL;
+ }
+
+ arg = arg->lasti->nexti; /* third arg list */
+ ip = arg->lasti;
+ if (/*ip == arg->nexti && */ ip->opcode == Op_push)
+ ip->opcode = Op_push_array;
}
} else if (r->builtin == do_sub || r->builtin == do_gsub) {
- if (subn->lnode->type != Node_regex)
- subn->lnode = mk_rexp(subn->lnode);
- if (nexp == 2)
- append_right(subn, node(node(make_number(0.0),
- Node_field_spec,
- (NODE *) NULL),
- Node_expression_list,
- (NODE *) NULL));
- else if (subn->rnode->rnode->lnode->type == Node_val) {
+ int literal = FALSE;
+
+ arg = subn->nexti; /* first arg list */
+ (void) mk_rexp(arg);
+
+ arg = arg->lasti->nexti; /* 2nd arg list */
+ if (nexp == 2) {
+ INSTRUCTION *expr;
+ expr = list_create(instruction(Op_push_i));
+ expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
+ (void) mk_expression_list(subn,
+ list_append(expr, instruction(Op_field_spec)));
+ }
+
+ arg = arg->lasti->nexti; /* third arg list */
+ ip = arg->lasti;
+ if (ip->opcode == Op_push_i) {
if (do_lint)
lintwarn(_("%s: string literal as last arg of substitute has no effect"),
(r->builtin == do_sub) ? "sub" : "gsub");
- } else if (! isassignable(subn->rnode->rnode->lnode)) {
- yyerror(_("%s third parameter is not a changeable object"),
- (r->builtin == do_sub) ? "sub" : "gsub");
+ literal = TRUE;
+ } else {
+ if (make_assignable(ip) == NULL)
+ yyerror(_("%s third parameter is not a changeable object"),
+ (r->builtin == do_sub) ? "sub" : "gsub");
+ else
+ ip->do_reference = TRUE;
+ }
+
+ /* kludge: This is one of the few cases
+ * when we need to know the type of item on stack.
+ * In case of string literal as the last argument,
+ * pass 4 as # of args (See sub_common code in builtin.c).
+ * Other cases like length(array or scalar) seem
+ * to work out ok.
+ */
+
+ r->expr_count = count_expressions(&subn, FALSE) + !!literal;
+ ip = subn->lasti;
+
+ (void) list_append(subn, r);
+
+ /* add after_assign bytecode(s) */
+ if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
+ (void) list_append(subn, instruction(Op_var_assign));
+ subn->lasti->memory = ip->memory;
+ } else if (ip->opcode == Op_field_spec_lhs) {
+ (void) list_append(subn, instruction(Op_field_assign));
+ subn->lasti->field_assign = (Func_ptr) 0;
+ ip->target_assign = subn->lasti;
}
+ return subn;
} else if (r->builtin == do_gensub) {
- if (subn->lnode->type != Node_regex)
- subn->lnode = mk_rexp(subn->lnode);
- if (nexp == 3)
- append_right(subn, node(node(make_number(0.0),
- Node_field_spec,
- (NODE *) NULL),
- Node_expression_list,
- (NODE *) NULL));
+ if (nexp == 3) {
+ arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */
+ ip = instruction(Op_push_i);
+ ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
+ (void) mk_expression_list(subn,
+ list_append(list_create(ip),
+ instruction(Op_field_spec)));
+ }
+ arg = subn->nexti; /* first arg list */
+ (void) mk_rexp(arg);
} else if (r->builtin == do_split) {
+ arg = subn->nexti->lasti->nexti; /* 2nd arg list */
+ ip = arg->lasti;
+ if (ip->opcode == Op_push)
+ ip->opcode = Op_push_array;
+ if (nexp == 2) {
+ INSTRUCTION *expr;
+ expr = list_create(instruction(Op_push));
+ expr->nexti->memory = FS_node;
+ (void) mk_expression_list(subn, expr);
+ }
+ arg = arg->lasti->nexti;
+ n = mk_rexp(arg);
if (nexp == 2)
- append_right(subn,
- node(FS_node, Node_expression_list, (NODE *) NULL));
- n = subn->rnode->rnode->lnode;
- if (n->type != Node_regex)
- subn->rnode->rnode->lnode = mk_rexp(n);
- if (nexp == 2)
- subn->rnode->rnode->lnode->re_flags |= FS_DFLT;
+ n->re_flags |= FS_DFLT;
+ if (nexp == 4) {
+ arg = arg->lasti->nexti;
+ ip = arg->lasti;
+ if (ip->opcode == Op_push)
+ ip->opcode = Op_push_array;
+ }
} else if (r->builtin == do_patsplit) {
- if (nexp == 2)
- append_right(subn,
- node(FPAT_node, Node_expression_list, (NODE *) NULL));
- n = subn->rnode->rnode->lnode;
- if (n->type != Node_regex)
- subn->rnode->rnode->lnode = mk_rexp(n);
+ arg = subn->nexti->lasti->nexti; /* 2nd arg list */
+ ip = arg->lasti;
+ if (ip->opcode == Op_push)
+ ip->opcode = Op_push_array;
+ if (nexp == 2) {
+ INSTRUCTION *expr;
+ expr = list_create(instruction(Op_push));
+ expr->nexti->memory = FPAT_node;
+ (void) mk_expression_list(subn, expr);
+ }
+ arg = arg->lasti->nexti;
+ n = mk_rexp(arg);
+ if (nexp == 4) {
+ arg = arg->lasti->nexti;
+ ip = arg->lasti;
+ if (ip->opcode == Op_push)
+ ip->opcode = Op_push_array;
+ }
} else if (r->builtin == do_close) {
static short warned = FALSE;
-
- if ( nexp == 2) {
- if (do_lint && nexp == 2 && ! warned) {
+ if (nexp == 2) {
+ if (do_lint && ! warned) {
warned = TRUE;
lintwarn(_("close: second argument is a gawk extension"));
}
- if (do_traditional)
- fatal(_("close: second argument is a gawk extension"));
+ if (do_traditional) {
+ yyerror(_("close: second argument is a gawk extension"));
+ return NULL;
+ }
}
} else if (do_intl /* --gen-po */
&& r->builtin == do_dcgettext /* dcgettext(...) */
- && subn->lnode->type == Node_val /* 1st arg is constant */
- && (subn->lnode->flags & STRCUR) != 0) { /* it's a string constant */
+ && subn->nexti->lasti->opcode == Op_push_i /* 1st arg is constant */
+ && (subn->nexti->lasti->memory->flags & STRCUR) != 0) { /* it's a string constant */
/* ala xgettext, dcgettext("some string" ...) dumps the string */
- NODE *str = subn->lnode;
+ NODE *str = subn->nexti->lasti->memory;
if ((str->flags & INTLSTR) != 0)
warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
@@ -2602,45 +3683,65 @@ snode(NODE *subn, NODETYPE op, int idx)
dumpintlstr(str->stptr, str->stlen);
} else if (do_intl /* --gen-po */
&& r->builtin == do_dcngettext /* dcngettext(...) */
- && subn->lnode->type == Node_val /* 1st arg is constant */
- && (subn->lnode->flags & STRCUR) != 0 /* it's a string constant */
- && subn->rnode->lnode->type == Node_val /* 2nd arg is constant too */
- && (subn->rnode->lnode->flags & STRCUR) != 0) { /* it's a string constant */
+ && subn->nexti->lasti->opcode == Op_push_i /* 1st arg is constant */
+ && (subn->nexti->lasti->memory->flags & STRCUR) != 0 /* it's a string constant */
+ && subn->nexti->lasti->nexti->lasti->opcode == Op_push_i /* 2nd arg is constant too */
+ && (subn->nexti->lasti->nexti->lasti->memory->flags & STRCUR) != 0) { /* it's a string constant */
/* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
- NODE *str1 = subn->lnode;
- NODE *str2 = subn->rnode->lnode;
+ NODE *str1 = subn->nexti->lasti->memory;
+ NODE *str2 = subn->nexti->lasti->nexti->lasti->memory;
if (((str1->flags | str2->flags) & INTLSTR) != 0)
warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
else
dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
+ } else if (r->builtin == do_asort || r->builtin == do_asorti) {
+ arg = subn->nexti; /* 1st arg list */
+ ip = arg->lasti;
+ if (/* ip == arg->nexti && */ ip->opcode == Op_push)
+ ip->opcode = Op_push_array;
+ if (nexp == 2) {
+ arg = ip->nexti;
+ ip = arg->lasti;
+ if (/* ip == arg->nexti && */ ip->opcode == Op_push)
+ ip->opcode = Op_push_array;
+ }
}
+#ifdef ARRAYDEBUG
+ else if (r->builtin == do_adump) {
+ ip = subn->nexti->lasti;
+ if (ip->opcode == Op_push)
+ ip->opcode = Op_push_array;
+ }
+#endif
- r->subnode = subn;
- if (r->builtin == do_sprintf) {
- count_args(r);
- if (r->lnode != NULL) /* r->lnode set from subn. guard against syntax errors & check it's valid */
- r->lnode->printf_count = r->printf_count; /* hack */
+ if (subn != NULL) {
+ r->expr_count = count_expressions(&subn, FALSE);
+ return list_append(subn, r);
}
- return r;
+
+ r->expr_count = 0;
+ return list_create(r);
}
-/* make_for_loop --- build a for loop */
+/* append_param --- append PNAME to the list of parameters
+ * for the current function.
+ */
-static NODE *
-make_for_loop(NODE *init, NODE *cond, NODE *incr)
+static void
+append_param(char *pname)
{
- register FOR_LOOP_HEADER *r;
- NODE *n;
+ static NODE *savetail = NULL;
+ NODE *p;
- emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop");
- getnode(n);
- n->type = Node_illegal;
- r->init = init;
- r->cond = cond;
- r->incr = incr;
- n->sub.nodep.r.hd = r;
- return n;
+ p = make_param(pname);
+ if (func_params == NULL) {
+ func_params = p;
+ savetail = p;
+ } else if (savetail != NULL) {
+ savetail->rnode = p;
+ savetail = p;
+ }
}
/* dup_parms --- return TRUE if there are duplicate parameters */
@@ -2648,7 +3749,7 @@ make_for_loop(NODE *init, NODE *cond, NODE *incr)
static int
dup_parms(NODE *func)
{
- register NODE *np;
+ NODE *np;
const char *fname, **names;
int count, i, j, dups;
NODE *params;
@@ -2671,7 +3772,7 @@ dup_parms(NODE *func)
i = 0;
for (np = params; np != NULL; np = np->rnode) {
if (np->param == NULL) { /* error earlier, give up, go home */
- free(names);
+ efree(names);
return TRUE;
}
names[i++] = np->param;
@@ -2684,36 +3785,45 @@ dup_parms(NODE *func)
dups++;
error(
_("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
- fname, i+1, names[j], j+1);
+ fname, i + 1, names[j], j+1);
}
}
}
- free(names);
+ efree(names);
return (dups > 0 ? TRUE : FALSE);
}
/* parms_shadow --- check if parameters shadow globals */
static int
-parms_shadow(const char *fname, NODE *func)
+parms_shadow(INSTRUCTION *pc, int *shadow)
{
- int count, i;
+ int pcount, i;
int ret = FALSE;
+ NODE *func;
+ char *fname;
+ func = pc->func_body;
+ fname = func->lnode->param;
+
+#if 0 /* can't happen, already exited if error ? */
if (fname == NULL || func == NULL) /* error earlier */
return FALSE;
+#endif
- count = func->lnode->param_cnt;
+ pcount = func->lnode->param_cnt;
- if (count == 0) /* no args, no problem */
- return FALSE;
+ if (pcount == 0) /* no args, no problem */
+ return 0;
+ source = pc->source_file;
+ sourceline = pc->source_line;
/*
* Use warning() and not lintwarn() so that can warn
* about all shadowed parameters.
*/
- for (i = 0; i < count; i++) {
+ for (i = 0; i < pcount; i++) {
if (lookup(func->parmlist[i]) != NULL) {
warning(
_("function `%s': parameter `%s' shadows global variable"),
@@ -2722,21 +3832,27 @@ parms_shadow(const char *fname, NODE *func)
}
}
- return ret;
+ *shadow |= ret;
+ return 0;
}
+
/*
- * install:
+ * install_symbol:
* Install a name in the symbol table, even if it is already there.
* Caller must check against redefinition if that is desired.
*/
+
NODE *
-install(char *name, NODE *value)
+install_symbol(char *name, NODE *value)
{
- register NODE *hp;
- register size_t len;
- register int bucket;
+ NODE *hp;
+ size_t len;
+ int bucket;
+
+ if (install_func)
+ (*install_func)(name);
var_count++;
len = strlen(name);
@@ -2752,27 +3868,26 @@ install(char *name, NODE *value)
return hp->hvalue;
}
-/* lookup --- find the most recent hash node for name installed by install */
+/* lookup --- find the most recent hash node for name installed by install_symbol */
NODE *
lookup(const char *name)
{
- register NODE *bucket;
- register size_t len;
+ NODE *bucket;
+ size_t len;
len = strlen(name);
for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE, NULL)];
bucket != NULL; bucket = bucket->hnext)
if (bucket->hlength == len && STREQN(bucket->hname, name, len))
return bucket->hvalue;
-
return NULL;
}
-/* var_comp --- compare two variable names */
+/* sym_comp --- compare two symbol (variable or function) names */
static int
-var_comp(const void *v1, const void *v2)
+sym_comp(const void *v1, const void *v2)
{
const NODE *const *npp1, *const *npp2;
const NODE *n1, *n2;
@@ -2793,77 +3908,90 @@ var_comp(const void *v1, const void *v2)
/* valinfo --- dump var info */
-static void
-valinfo(NODE *n, FILE *fp)
+void
+valinfo(NODE *n, int (*print_func)(FILE *, const char *, ...), FILE *fp)
{
- if (n->flags & STRING) {
- fprintf(fp, "string (");
- pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE);
- fprintf(fp, ")\n");
+ if (n == Nnull_string)
+ print_func(fp, "uninitialized scalar\n");
+ else if (n->flags & STRING) {
+ pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', FALSE);
+ print_func(fp, "\n");
} else if (n->flags & NUMBER)
- fprintf(fp, "number (%.17g)\n", n->numbr);
+ print_func(fp, "%.17g\n", n->numbr);
else if (n->flags & STRCUR) {
- fprintf(fp, "string value (");
- pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE);
- fprintf(fp, ")\n");
+ pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', FALSE);
+ print_func(fp, "\n");
} else if (n->flags & NUMCUR)
- fprintf(fp, "number value (%.17g)\n", n->numbr);
+ print_func(fp, "%.17g\n", n->numbr);
else
- fprintf(fp, "?? flags %s\n", flags2str(n->flags));
+ print_func(fp, "?? flags %s\n", flags2str(n->flags));
}
+/* get_varlist --- list of global variables */
-/* dump_vars --- dump the symbol table */
-
-void
-dump_vars(const char *fname)
+NODE **
+get_varlist()
{
int i, j;
NODE **table;
NODE *p;
- FILE *fp;
-
- emalloc(table, NODE **, var_count * sizeof(NODE *), "dump_vars");
-
- if (fname == NULL)
- fp = stderr;
- else if ((fp = fopen(fname, "w")) == NULL) {
- warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno));
- warning(_("sending profile to standard error"));
- fp = stderr;
- }
+ emalloc(table, NODE **, (var_count + 1) * sizeof(NODE *), "get_varlist");
+ update_global_values();
for (i = j = 0; i < HASHSIZE; i++)
for (p = variables[i]; p != NULL; p = p->hnext)
table[j++] = p;
-
assert(j == var_count);
/* Shazzam! */
- qsort(table, j, sizeof(NODE *), var_comp);
+ qsort(table, j, sizeof(NODE *), sym_comp);
+
+ table[j] = NULL;
+ return table;
+}
- for (i = 0; i < j; i++) {
- p = table[i];
+/* print_vars --- print names and values of global variables */
+
+void
+print_vars(int (*print_func)(FILE *, const char *, ...), FILE *fp)
+{
+ int i;
+ NODE **table;
+ NODE *p;
+
+ table = get_varlist();
+ for (i = 0; (p = table[i]) != NULL; i++) {
if (p->hvalue->type == Node_func)
continue;
- fprintf(fp, "%.*s: ", (int) p->hlength, p->hname);
+ print_func(fp, "%.*s: ", (int) p->hlength, p->hname);
if (p->hvalue->type == Node_var_array)
- fprintf(fp, "array, %ld elements\n", p->hvalue->table_size);
+ print_func(fp, "array, %ld elements\n", p->hvalue->table_size);
else if (p->hvalue->type == Node_var_new)
- fprintf(fp, "unused variable\n");
+ print_func(fp, "untyped variable\n");
else if (p->hvalue->type == Node_var)
- valinfo(p->hvalue->var_value, fp);
- else {
- NODE **lhs = get_lhs(p->hvalue, NULL, FALSE);
+ valinfo(p->hvalue->var_value, print_func, fp);
+ }
+ efree(table);
+}
- valinfo(*lhs, fp);
- }
+/* dump_vars --- dump the symbol table */
+
+void
+dump_vars(const char *fname)
+{
+ FILE *fp;
+
+ if (fname == NULL)
+ fp = stderr;
+ else if ((fp = fopen(fname, "w")) == NULL) {
+ warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno));
+ warning(_("sending profile to standard error"));
+ fp = stderr;
}
+ print_vars(fprintf, fp);
if (fp != stderr && fclose(fp) != 0)
warning(_("%s: close failed (%s)"), fname, strerror(errno));
-
- free(table);
}
/* release_all_vars --- free all variable memory */
@@ -2873,8 +4001,8 @@ release_all_vars()
{
int i;
NODE *p, *next;
-
- for (i = 0; i < HASHSIZE; i++)
+
+ for (i = 0; i < HASHSIZE; i++) {
for (p = variables[i]; p != NULL; p = next) {
next = p->hnext;
@@ -2882,40 +4010,14 @@ release_all_vars()
continue;
else if (p->hvalue->type == Node_var_array)
assoc_clear(p->hvalue);
- else if (p->hvalue->type != Node_var_new) {
- NODE **lhs = get_lhs(p->hvalue, NULL, FALSE);
-
- unref(*lhs);
- }
- unref(p);
- }
-}
-
-/* finfo --- for use in comparison and sorting of function names */
-
-struct finfo {
- const char *name;
- size_t nlen;
- NODE *func;
-};
-
-/* fcompare --- comparison function for qsort */
-
-static int
-fcompare(const void *p1, const void *p2)
-{
- const struct finfo *f1, *f2;
- int minlen;
+ else if (p->hvalue->type != Node_var_new)
+ unref(p->hvalue->var_value);
- f1 = (const struct finfo *) p1;
- f2 = (const struct finfo *) p2;
-
- if (f1->nlen > f2->nlen)
- minlen = f2->nlen;
- else
- minlen = f1->nlen;
-
- return strncmp(f1->name, f2->name, minlen);
+ efree(p->hname);
+ freenode(p->hvalue);
+ freenode(p);
+ }
+ }
}
/* dump_funcs --- print all functions */
@@ -2923,48 +4025,10 @@ fcompare(const void *p1, const void *p2)
void
dump_funcs()
{
- int i, j;
- NODE *p;
- struct finfo *tab = NULL;
-
- /*
- * Walk through symbol table countng functions.
- * Could be more than func_count if there are
- * extension functions.
- */
- for (i = j = 0; i < HASHSIZE; i++) {
- for (p = variables[i]; p != NULL; p = p->hnext) {
- if (p->hvalue->type == Node_func) {
- j++;
- }
- }
- }
-
- if (j == 0)
+ if (func_count <= 0)
return;
- emalloc(tab, struct finfo *, j * sizeof(struct finfo), "dump_funcs");
-
- /* now walk again, copying info */
- for (i = j = 0; i < HASHSIZE; i++) {
- for (p = variables[i]; p != NULL; p = p->hnext) {
- if (p->hvalue->type == Node_func) {
- tab[j].name = p->hname;
- tab[j].nlen = p->hlength;
- tab[j].func = p->hvalue;
- j++;
- }
- }
- }
-
-
- /* Shazzam! */
- qsort(tab, j, sizeof(struct finfo), fcompare);
-
- for (i = 0; i < j; i++)
- pp_func(tab[i].name, tab[i].nlen, tab[i].func);
-
- free(tab);
+ (void) foreach_func((int (*)(INSTRUCTION *, void *)) pp_func, TRUE, (void *) 0);
}
/* shadow_funcs --- check all functions for parameters that shadow globals */
@@ -2972,40 +4036,16 @@ dump_funcs()
void
shadow_funcs()
{
- int i, j;
- NODE *p;
- struct finfo *tab;
static int calls = 0;
int shadow = FALSE;
- if (func_count == 0)
+ if (func_count <= 0)
return;
if (calls++ != 0)
fatal(_("shadow_funcs() called twice!"));
- emalloc(tab, struct finfo *, func_count * sizeof(struct finfo), "shadow_funcs");
-
- for (i = j = 0; i < HASHSIZE; i++) {
- for (p = variables[i]; p != NULL; p = p->hnext) {
- if (p->hvalue->type == Node_func) {
- tab[j].name = p->hname;
- tab[j].nlen = p->hlength;
- tab[j].func = p->hvalue;
- j++;
- }
- }
- }
-
- assert(j == func_count);
-
- /* Shazzam! */
- qsort(tab, func_count, sizeof(struct finfo), fcompare);
-
- for (i = 0; i < j; i++)
- shadow |= parms_shadow(tab[i].name, tab[i].func);
-
- free(tab);
+ (void) foreach_func((int (*)(INSTRUCTION *, void *)) parms_shadow, TRUE, &shadow);
/* End with fatal if the user requested it. */
if (shadow && lintfunc != warning)
@@ -3013,54 +4053,6 @@ shadow_funcs()
}
/*
- * append_right:
- * Add new to the rightmost branch of LIST. This uses n^2 time, so we make
- * a simple attempt at optimizing it.
- */
-
-static NODE *
-append_right(NODE *list, NODE *new)
-{
- register NODE *oldlist;
- static NODE *savefront = NULL, *savetail = NULL;
-
- if (list == NULL || new == NULL)
- return list;
-
- oldlist = list;
- if (savefront == oldlist)
- list = savetail; /* Be careful: maybe list->rnode != NULL */
- else
- savefront = oldlist;
-
- while (list->rnode != NULL)
- list = list->rnode;
- savetail = list->rnode = new;
- return oldlist;
-}
-
-/*
- * append_pattern:
- * A wrapper around append_right, used for rule lists.
- */
-static inline NODE *
-append_pattern(NODE **list, NODE *patt)
-{
- NODE *n = node(patt, Node_rule_node, (NODE *) NULL);
-
- if (*list == NULL)
- *list = n;
- else {
- NODE *n1 = node(n, Node_rule_list, (NODE *) NULL);
- if ((*list)->type != Node_rule_list)
- *list = node(*list, Node_rule_list, n1);
- else
- (void) append_right(*list, n1);
- }
- return n;
-}
-
-/*
* func_install:
* check if name is already installed; if so, it had better have Null value,
* in which case def is added as the value. Otherwise, install name with def
@@ -3071,92 +4063,117 @@ append_pattern(NODE **list, NODE *patt)
* of each function parameter during a function call. See eval.c.
*/
-static void
-func_install(NODE *params, NODE *def)
+static int
+func_install(INSTRUCTION *func, INSTRUCTION *def)
{
- NODE *r, *n, *thisfunc;
- char **pnames, *names, *sp;
- size_t pcount = 0, space = 0;
+ NODE *params;
+ NODE *r, *n, *thisfunc, *hp;
+ char **pnames = NULL;
+ char *fname;
+ int pcount = 0;
int i;
+ params = func_params;
+
/* check for function foo(foo) { ... }. bleah. */
for (n = params->rnode; n != NULL; n = n->rnode) {
- if (strcmp(n->param, params->param) == 0)
- fatal(_("function `%s': can't use function name as parameter name"),
- params->param);
- else if (is_std_var(n->param))
- fatal(_("function `%s': can't use special variable `%s' as a function parameter"),
+ if (strcmp(n->param, params->param) == 0) {
+ error(_("function `%s': can't use function name as parameter name"),
+ params->param);
+ errcount++;
+ return -1;
+ } else if (is_std_var(n->param)) {
+ error(_("function `%s': can't use special variable `%s' as a function parameter"),
params->param, n->param);
+ errcount++;
+ return -1;
+ }
}
- thisfunc = NULL; /* turn off warnings */
+ thisfunc = NULL; /* turn off warnings */
- /* symbol table managment */
- pop_var(params, FALSE);
- r = lookup(params->param);
+ fname = params->param;
+ /* symbol table management */
+ hp = remove_symbol(params->param); /* remove function name out of symbol table */
+ if (hp != NULL)
+ freenode(hp);
+ r = lookup(fname);
if (r != NULL) {
- fatal(_("function name `%s' previously defined"), params->param);
- } else if (params->param == builtin_func) /* not a valid function name */
+ error(_("function name `%s' previously defined"), fname);
+ errcount++;
+ return -1;
+ } else if (fname == builtin_func) /* not a valid function name */
goto remove_params;
+ /* add an implicit return at end;
+ * also used by 'return' command in debugger
+ */
+
+ (void) list_append(def, instruction(Op_push_i));
+ def->lasti->memory = Nnull_string;
+ (void) list_append(def, instruction(Op_K_return));
+
+ if (do_profiling)
+ (void) list_prepend(def, instruction(Op_exec_count));
+
+ /* func->opcode is Op_func */
+ (func + 1)->firsti = def->nexti;
+ (func + 1)->lasti = def->lasti;
+ (func + 2)->first_line = func->source_line;
+ (func + 2)->last_line = lastline;
+
+ func->nexti = def->nexti;
+ bcfree(def);
+
+ (void) list_append(rule_list, func + 1); /* debugging */
+
/* install the function */
- thisfunc = node(params, Node_func, def);
- (void) install(params->param, thisfunc);
+ thisfunc = mk_symbol(Node_func, params);
+ (void) install_symbol(fname, thisfunc);
+ thisfunc->code_ptr = func;
+ func->func_body = thisfunc;
- /* figure out amount of space to allocate for variable names */
- for (n = params->rnode; n != NULL; n = n->rnode) {
+ for (n = params->rnode; n != NULL; n = n->rnode)
pcount++;
- space += strlen(n->param) + 1;
- }
- /* allocate it and fill it in */
if (pcount != 0) {
- emalloc(names, char *, space, "func_install");
- emalloc(pnames, char **, pcount * sizeof(char *), "func_install");
- sp = names;
- for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode) {
- pnames[i] = sp;
- strcpy(sp, n->param);
- sp += strlen(n->param) + 1;
- }
- thisfunc->parmlist = pnames;
- } else {
- thisfunc->parmlist = NULL;
+ emalloc(pnames, char **, (pcount + 1) * sizeof(char *), "func_install");
+ for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode)
+ pnames[i] = n->param;
+ pnames[pcount] = NULL;
}
+ thisfunc->parmlist = pnames;
/* update lint table info */
- func_use(params->param, FUNC_DEFINE);
+ func_use(fname, FUNC_DEFINE);
- func_count++; /* used by profiling / pretty printer */
+ func_count++; /* used in profiler / pretty printer */
remove_params:
/* remove params from symbol table */
pop_params(params->rnode);
+ return 0;
}
-/* pop_var --- remove a variable from the symbol table */
+/* remove_symbol --- remove a variable from the symbol table */
-static void
-pop_var(NODE *np, int freeit)
+NODE *
+remove_symbol(char *name)
{
- register NODE *bucket, **save;
- register size_t len;
- char *name;
+ NODE *bucket, **save;
+ size_t len;
- name = np->param;
len = strlen(name);
save = &(variables[hash(name, len, (unsigned long) HASHSIZE, NULL)]);
for (bucket = *save; bucket != NULL; bucket = bucket->hnext) {
if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
var_count--;
*save = bucket->hnext;
- freenode(bucket);
- if (freeit)
- free(np->param);
- return;
+ return bucket;
}
save = &(bucket->hnext);
}
+ return NULL;
}
/* pop_params --- remove list of function parameters from symbol table */
@@ -3168,10 +4185,13 @@ pop_var(NODE *np, int freeit)
static void
pop_params(NODE *params)
{
+ NODE *hp;
if (params == NULL)
return;
pop_params(params->rnode);
- pop_var(params, TRUE);
+ hp = remove_symbol(params->param);
+ if (hp != NULL)
+ freenode(hp);
}
/* make_param --- make NAME into a function parameter */
@@ -3184,9 +4204,8 @@ make_param(char *name)
getnode(r);
r->type = Node_param_list;
r->rnode = NULL;
- r->param = name;
r->param_cnt = param_counter++;
- return (install(name, r));
+ return (install_symbol(name, r));
}
static struct fdesc {
@@ -3240,6 +4259,9 @@ check_funcs()
struct fdesc *fp, *next;
int i;
+ if (get_context()->level > 0)
+ goto free_mem;
+
for (i = 0; i < HASHSIZE; i++) {
for (fp = ftable[i]; fp != NULL; fp = fp->next) {
#ifdef REALLYMEAN
@@ -3261,29 +4283,98 @@ check_funcs()
}
}
+free_mem:
/* now let's free all the memory */
for (i = 0; i < HASHSIZE; i++) {
for (fp = ftable[i]; fp != NULL; fp = next) {
next = fp->next;
- free(fp->name);
- free(fp);
+ efree(fp->name);
+ efree(fp);
}
+ ftable[i] = NULL;
}
}
/* param_sanity --- look for parameters that are regexp constants */
static void
-param_sanity(NODE *arglist)
+param_sanity(INSTRUCTION *arglist)
{
- NODE *argp, *arg;
- int i;
+ INSTRUCTION *argl, *arg;
+ int i = 1;
- for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) {
- arg = argp->lnode;
- if (arg->type == Node_regex)
+ if (arglist == NULL)
+ return;
+ for (argl = arglist->nexti; argl; ) {
+ arg = argl->lasti;
+ if (arg->opcode == Op_match_rec)
warning(_("regexp constant for parameter #%d yields boolean value"), i);
+ argl = arg->nexti;
+ i++;
+ }
+}
+
+/* foreach_func --- execute given function for each awk function in symbol table. */
+
+int
+foreach_func(int (*pfunc)(INSTRUCTION *, void *), int sort, void *data)
+{
+ int i, j;
+ NODE *p;
+ int ret = 0;
+
+ if (sort) {
+ NODE **tab;
+
+ /*
+ * Walk through symbol table counting functions.
+ * Could be more than func_count if there are
+ * extension functions.
+ */
+ for (i = j = 0; i < HASHSIZE; i++) {
+ for (p = variables[i]; p != NULL; p = p->hnext) {
+ if (p->hvalue->type == Node_func) {
+ j++;
+ }
+ }
+ }
+
+ if (j == 0)
+ return 0;
+
+ emalloc(tab, NODE **, j * sizeof(NODE *), "foreach_func");
+
+ /* now walk again, copying info */
+ for (i = j = 0; i < HASHSIZE; i++) {
+ for (p = variables[i]; p != NULL; p = p->hnext) {
+ if (p->hvalue->type == Node_func) {
+ tab[j] = p;
+ j++;
+ }
+ }
+ }
+
+ /* Shazzam! */
+ qsort(tab, j, sizeof(NODE *), sym_comp);
+
+ for (i = 0; i < j; i++) {
+ if ((ret = pfunc(tab[i]->hvalue->code_ptr, data)) != 0)
+ break;
+ }
+
+ efree(tab);
+ return ret;
+ }
+
+ /* unsorted */
+ for (i = 0; i < HASHSIZE; i++) {
+ for (p = variables[i]; p != NULL; p = p->hnext) {
+ if (p->hvalue->type == Node_func
+ && (ret = pfunc(p->hvalue->code_ptr, data)) != 0)
+ return ret;
+ }
}
+ return 0;
}
/* deferred variables --- those that are only defined if needed. */
@@ -3320,33 +4411,31 @@ register_deferred_variable(const char *name, NODE *(*load_func)(void))
/* variable --- make sure NAME is in the symbol table */
NODE *
-variable(char *name, int can_free, NODETYPE type)
+variable(char *name, NODETYPE type)
{
- register NODE *r;
+ NODE *r;
if ((r = lookup(name)) != NULL) {
- if (r->type == Node_func)
- fatal(_("function `%s' called with space between name and `(',\nor used as a variable or an array"),
+ if (r->type == Node_func) {
+ error(_("function `%s' called with space between name and `(',\nor used as a variable or an array"),
r->vname);
-
+ errcount++;
+ r->type = Node_var_new; /* continue parsing instead of exiting */
+ }
} else {
/* not found */
struct deferred_variable *dv;
for (dv = deferred_variables; TRUE; dv = dv->next) {
if (dv == NULL) {
- /*
- * This is the only case in which we may not
- * free the string.
- */
- NODE *n;
-
- if (type == Node_var_array)
- n = node(NULL, type, NULL);
+ /*
+ * This is the only case in which we may not free the string.
+ */
+ if (type == Node_var)
+ r = mk_symbol(type, Nnull_string);
else
- n = node(Nnull_string, type, NULL);
-
- return install(name, n);
+ r = mk_symbol(type, (NODE *) NULL);
+ return install_symbol(name, r);
}
if (STREQ(name, dv->name)) {
r = (*dv->load_func)();
@@ -3354,31 +4443,55 @@ variable(char *name, int can_free, NODETYPE type)
}
}
}
- if (can_free)
- free(name);
+ efree(name);
return r;
}
-/* mk_rexp --- make a regular expression constant */
+/* make_regnode --- make a regular expression node */
static NODE *
-mk_rexp(NODE *exp)
+make_regnode(int type, NODE *exp)
{
NODE *n;
- if (exp->type == Node_regex)
- return exp;
-
getnode(n);
- n->type = Node_dynregex;
- n->re_exp = exp;
- n->re_text = NULL;
- n->re_reg = NULL;
- n->re_flags = 0;
+ memset(n, 0, sizeof(NODE));
+ n->type = type;
n->re_cnt = 1;
+
+ if (type == Node_regex) {
+ n->re_reg = make_regexp(exp->stptr, exp->stlen, FALSE, TRUE, FALSE);
+ if (n->re_reg == NULL) {
+ freenode(n);
+ return NULL;
+ }
+ n->re_exp = exp;
+ n->re_flags = CONSTANT;
+ }
return n;
}
+
+/* mk_rexp --- make a regular expression constant */
+
+static NODE *
+mk_rexp(INSTRUCTION *list)
+{
+ INSTRUCTION *ip;
+
+ ip = list->nexti;
+ if (ip == list->lasti && ip->opcode == Op_match_rec)
+ ip->opcode = Op_push_re;
+ else {
+ ip = instruction(Op_push_re);
+ ip->memory = make_regnode(Node_dynregex, NULL);
+ ip->nexti = list->lasti->nexti;
+ list->lasti->nexti = ip;
+ list->lasti = ip;
+ }
+ return ip->memory;
+}
+
/* isnoeffect --- when used as a statement, has no side effects */
/*
@@ -3389,48 +4502,38 @@ mk_rexp(NODE *exp)
*/
static int
-isnoeffect(NODETYPE type)
+isnoeffect(OPCODE type)
{
switch (type) {
- case Node_times:
- case Node_quotient:
- case Node_mod:
- case Node_plus:
- case Node_minus:
- case Node_subscript:
- case Node_concat:
- case Node_exp:
- case Node_unary_minus:
- case Node_field_spec:
- case Node_and:
- case Node_or:
- case Node_equal:
- case Node_notequal:
- case Node_less:
- case Node_greater:
- case Node_leq:
- case Node_geq:
- case Node_match:
- case Node_nomatch:
- case Node_not:
- case Node_val:
- case Node_in_array:
- case Node_NF:
- case Node_NR:
- case Node_FNR:
- case Node_FPAT:
- case Node_FS:
- case Node_RS:
- case Node_FIELDWIDTHS:
- case Node_IGNORECASE:
- case Node_OFS:
- case Node_ORS:
- case Node_OFMT:
- case Node_CONVFMT:
- case Node_BINMODE:
- case Node_LINT:
- case Node_SUBSEP:
- case Node_TEXTDOMAIN:
+ case Op_times:
+ case Op_times_i:
+ case Op_quotient:
+ case Op_quotient_i:
+ case Op_mod:
+ case Op_mod_i:
+ case Op_plus:
+ case Op_plus_i:
+ case Op_minus:
+ case Op_minus_i:
+ case Op_subscript:
+ case Op_concat:
+ case Op_exp:
+ case Op_exp_i:
+ case Op_unary_minus:
+ case Op_field_spec:
+ case Op_and_final:
+ case Op_or_final:
+ case Op_equal:
+ case Op_notequal:
+ case Op_less:
+ case Op_greater:
+ case Op_leq:
+ case Op_geq:
+ case Op_match:
+ case Op_nomatch:
+ case Op_match_rec:
+ case Op_not:
+ case Op_in_array:
return TRUE;
default:
break; /* keeps gcc -Wall happy */
@@ -3439,45 +4542,34 @@ isnoeffect(NODETYPE type)
return FALSE;
}
-/* isassignable --- can this node be assigned to? */
+/* make_assignable --- make this operand an assignable one if posiible */
-static int
-isassignable(register NODE *n)
+static INSTRUCTION *
+make_assignable(INSTRUCTION *ip)
{
- switch (n->type) {
- case Node_var_new:
- case Node_var:
- case Node_FIELDWIDTHS:
- case Node_RS:
- case Node_FS:
- case Node_FNR:
- case Node_FPAT:
- case Node_NR:
- case Node_NF:
- case Node_IGNORECASE:
- case Node_OFMT:
- case Node_CONVFMT:
- case Node_ORS:
- case Node_OFS:
- case Node_LINT:
- case Node_BINMODE:
- case Node_SUBSEP:
- case Node_TEXTDOMAIN:
- case Node_field_spec:
- case Node_subscript:
- return TRUE;
- case Node_param_list:
- return ((n->flags & FUNC) == 0); /* ok if not func name */
+ switch (ip->opcode) {
+ case Op_push:
+ if (ip->memory->type == Node_param_list
+ && (ip->memory->flags & FUNC) != 0)
+ return NULL;
+ ip->opcode = Op_push_lhs;
+ return ip;
+ case Op_field_spec:
+ ip->opcode = Op_field_spec_lhs;
+ return ip;
+ case Op_subscript:
+ ip->opcode = Op_subscript_lhs;
+ return ip;
default:
break; /* keeps gcc -Wall happy */
}
- return FALSE;
+ return NULL;
}
/* stopme --- for debugging */
NODE *
-stopme(NODE *tree ATTRIBUTE_UNUSED)
+stopme(int nargs ATTRIBUTE_UNUSED)
{
return (NODE *) 0;
}
@@ -3499,7 +4591,7 @@ dumpintlstr(const char *str, size_t len)
}
printf("msgid ");
- pp_string_fp(stdout, str, len, '"', TRUE);
+ pp_string_fp(fprintf, stdout, str, len, '"', TRUE);
putchar('\n');
printf("msgstr \"\"\n\n");
fflush(stdout);
@@ -3522,35 +4614,15 @@ dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2)
}
printf("msgid ");
- pp_string_fp(stdout, str1, len1, '"', TRUE);
+ pp_string_fp(fprintf, stdout, str1, len1, '"', TRUE);
putchar('\n');
printf("msgid_plural ");
- pp_string_fp(stdout, str2, len2, '"', TRUE);
+ pp_string_fp(fprintf, stdout, str2, len2, '"', TRUE);
putchar('\n');
printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
fflush(stdout);
}
-/* count_args --- count the number of printf arguments */
-
-static void
-count_args(NODE *tree)
-{
- size_t count = 0;
- NODE *save_tree;
-
- assert(tree->type == Node_K_printf
- || (tree->type == Node_builtin && tree->builtin == do_sprintf));
- save_tree = tree;
-
- tree = tree->lnode; /* printf format string */
-
- for (count = 0; tree != NULL; tree = tree->rnode)
- count++;
-
- save_tree->printf_count = count;
-}
-
/* isarray --- can this type be subscripted? */
static int
@@ -3572,6 +4644,1194 @@ isarray(NODE *n)
return FALSE;
}
+
+static INSTRUCTION *
+mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op)
+{
+ INSTRUCTION *ip,*ip1;
+ AWKNUM res;
+
+ ip = s2->nexti;
+ if (s2->lasti == ip && ip->opcode == Op_push_i) {
+ /* do any numeric constant folding */
+ ip1 = s1->nexti;
+ if (ip1->memory != NULL && ip1->memory->type == Node_val
+ && (ip1->memory->flags & (STRCUR|STRING)) == 0
+ && ip->memory != NULL && ip->memory->type == Node_val
+ && (ip->memory->flags & (STRCUR|STRING)) == 0
+ && ip1 == s1->lasti && do_optimize > 1) {
+ ip1->memory->numbr = force_number(ip1->memory);
+ ip->memory->numbr = force_number(ip->memory);
+ res = ip1->memory->numbr;
+ switch (op->opcode) {
+ case Op_times:
+ res *= ip->memory->numbr;
+ break;
+ case Op_quotient:
+ if (ip->memory->numbr == 0) {
+ /* don't fatalize, allow parsing rest of the input */
+ yyerror(_("division by zero attempted"));
+ goto regular;
+ }
+
+ res /= ip->memory->numbr;
+ break;
+ case Op_mod:
+ if (ip->memory->numbr == 0) {
+ /* don't fatalize, allow parsing rest of the input */
+ yyerror(_("division by zero attempted in `%%'"));
+ goto regular;
+ }
+#ifdef HAVE_FMOD
+ res = fmod(res, ip->memory->numbr);
+#else /* ! HAVE_FMOD */
+ (void) modf(res / ip->memory->numbr, &res);
+ res = ip1->memory->numbr - res * ip->memory->numbr;
+#endif /* ! HAVE_FMOD */
+ break;
+ case Op_plus:
+ res += ip->memory->numbr;
+ break;
+ case Op_minus:
+ res -= ip->memory->numbr;
+ break;
+ case Op_exp:
+ res = calc_exp(res, ip->memory->numbr);
+ break;
+ default:
+ goto regular;
+ }
+
+ op->opcode = Op_push_i;
+ op->memory = mk_number(res, (PERM|NUMCUR|NUMBER));
+ bcfree(ip1);
+ bcfree(ip);
+ bcfree(s1);
+ bcfree(s2);
+ return list_create(op);
+ } else {
+ /* do basic arithmetic optimisation */
+ /* convert (Op_push_i Node_val) + (Op_plus) to (Op_plus_i Node_val) */
+ switch (op->opcode) {
+ case Op_times:
+ op->opcode = Op_times_i;
+ break;
+ case Op_quotient:
+ op->opcode = Op_quotient_i;
+ break;
+ case Op_mod:
+ op->opcode = Op_mod_i;
+ break;
+ case Op_plus:
+ op->opcode = Op_plus_i;
+ break;
+ case Op_minus:
+ op->opcode = Op_minus_i;
+ break;
+ case Op_exp:
+ op->opcode = Op_exp_i;
+ break;
+ default:
+ goto regular;
+ }
+
+ op->memory = ip->memory;
+ bcfree(ip);
+ bcfree(s2); /* Op_list */
+ return list_append(s1, op);
+ }
+ }
+
+regular:
+ /* append lists s1, s2 and add `op' bytecode */
+ (void) list_merge(s1, s2);
+ return list_append(s1, op);
+}
+
+/* mk_boolean --- instructions for boolean and, or */
+
+static INSTRUCTION *
+mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op)
+{
+ INSTRUCTION *tp;
+ OPCODE opc, final_opc;
+
+ opc = op->opcode; /* Op_and or Op_or */
+ final_opc = (opc == Op_or) ? Op_or_final : Op_and_final;
+
+ add_lint(right, LINT_assign_in_cond);
+
+ tp = left->lasti;
+
+ if (tp->opcode != final_opc) { /* x || y */
+ list_append(right, instruction(final_opc));
+ add_lint(left, LINT_assign_in_cond);
+ (void) list_append(left, op);
+ left->lasti->target_jmp = right->lasti;
+
+ /* NB: target_stmt points to previous Op_and(Op_or) in a chain;
+ * target_stmt only used in the parser (see below).
+ */
+
+ left->lasti->target_stmt = left->lasti;
+ right->lasti->target_stmt = left->lasti;
+ } else { /* optimization for x || y || z || ... */
+ INSTRUCTION *ip;
+
+ op->opcode = final_opc;
+ (void) list_append(right, op);
+ op->target_stmt = tp;
+ tp->opcode = opc;
+ tp->target_jmp = op;
+
+ /* update jump targets */
+ for (ip = tp->target_stmt; ; ip = ip->target_stmt) {
+ assert(ip->opcode == opc);
+ assert(ip->target_jmp == tp);
+ /* if (ip->opcode == opc && ip->target_jmp == tp) */
+ ip->target_jmp = op;
+ if (ip->target_stmt == ip)
+ break;
+ }
+ }
+
+ return list_merge(left, right);
+}
+
+/* mk_condition --- if-else and conditional */
+
+static INSTRUCTION *
+mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
+ INSTRUCTION *elsep, INSTRUCTION *false_branch)
+{
+ /*
+ * ----------------
+ * cond
+ * ----------------
+ * t: [Op_jmp_false f ]
+ * ----------------
+ * true_branch
+ *
+ * ----------------
+ * [Op_jmp y]
+ * ----------------
+ * f:
+ * false_branch
+ * ----------------
+ * y: [Op_no_op]
+ * ----------------
+ */
+
+ INSTRUCTION *ip;
+
+ /* FIXME else { } -- add elsep */
+
+ if (false_branch == NULL) {
+ if (elsep != NULL) /* else { } */
+ false_branch = list_append(list_create(elsep), instruction(Op_no_op));
+ else
+ false_branch = list_create(instruction(Op_no_op));
+ } else {
+ /* assert(elsep != NULL); */
+
+ /* avoid a series of no_op's: if .. else if .. else if .. */
+ if (false_branch->lasti->opcode != Op_no_op)
+ (void) list_append(false_branch, instruction(Op_no_op));
+ (void) list_prepend(false_branch, elsep);
+ false_branch->nexti->branch_end = false_branch->lasti;
+ if (do_profiling)
+ (void) list_prepend(false_branch, instruction(Op_exec_count));
+ }
+
+ (void) list_prepend(false_branch, instruction(Op_jmp));
+ false_branch->nexti->target_jmp = false_branch->lasti;
+
+ add_lint(cond, LINT_assign_in_cond);
+ ip = list_append(cond, instruction(Op_jmp_false));
+ ip->lasti->target_jmp = false_branch->nexti->nexti;
+
+ (void) list_prepend(ip, ifp);
+ if (do_profiling) {
+ (void) list_append(ip, instruction(Op_exec_count));
+ ip->nexti->branch_if = ip->lasti;
+ ip->nexti->branch_else = false_branch->nexti;
+ }
+
+ if (true_branch != NULL)
+ list_merge(ip, true_branch);
+ return list_merge(ip, false_branch);
+}
+
+enum defline { FIRST_LINE, LAST_LINE };
+
+/* find_line -- find the first(last) line in a list of (pattern) instructions */
+
+static int
+find_line(INSTRUCTION *pattern, enum defline what)
+{
+ INSTRUCTION *ip;
+ int lineno = 0;
+
+ for (ip = pattern->nexti; ip; ip = ip->nexti) {
+ if (what == LAST_LINE) {
+ if (ip->source_line > lineno)
+ lineno = ip->source_line;
+ } else { /* FIRST_LINE */
+ if (ip->source_line > 0
+ && (lineno == 0 || ip->source_line < lineno))
+ lineno = ip->source_line;
+ }
+ if (ip == pattern->lasti)
+ break;
+ }
+ assert(lineno > 0);
+ return lineno;
+}
+
+/* append_rule --- pattern-action instructions */
+
+static INSTRUCTION *
+append_rule(INSTRUCTION *pattern, INSTRUCTION *action)
+{
+ /*
+ * ----------------
+ * pattern
+ * ----------------
+ * [Op_jmp_false f ]
+ * ----------------
+ * action
+ * ----------------
+ * f: [Op_no_op ]
+ * ----------------
+ */
+
+ INSTRUCTION *rp;
+ INSTRUCTION *tp;
+ INSTRUCTION *ip;
+
+ if (rule != Rule) {
+ rp = pattern;
+ if (do_profiling)
+ (void) list_append(action, instruction(Op_no_op));
+ (rp + 1)->firsti = action->nexti;
+ (rp + 1)->lasti = action->lasti;
+ (rp + 2)->first_line = pattern->source_line;
+ (rp + 2)->last_line = lastline;
+ ip = list_prepend(action, rp);
+
+ } else {
+ rp = bcalloc(Op_rule, 3, 0);
+ rp->in_rule = Rule;
+ rp->source_file = source;
+ tp = instruction(Op_no_op);
+
+ if (pattern == NULL) {
+ /* assert(action != NULL); */
+ if (do_profiling)
+ (void) list_prepend(action, instruction(Op_exec_count));
+ (rp + 1)->firsti = action->nexti;
+ (rp + 1)->lasti = tp;
+ (rp + 2)->first_line = firstline;
+ (rp + 2)->last_line = lastline;
+ rp->source_line = firstline;
+ ip = list_prepend(list_append(action, tp), rp);
+ } else {
+ (void) list_append(pattern, instruction(Op_jmp_false));
+ pattern->lasti->target_jmp = tp;
+ (rp + 2)->first_line = find_line(pattern, FIRST_LINE);
+ rp->source_line = (rp + 2)->first_line;
+ if (action == NULL) {
+ (rp + 2)->last_line = find_line(pattern, LAST_LINE);
+ action = list_create(instruction(Op_K_print_rec));
+ if (do_profiling)
+ (void) list_prepend(action, instruction(Op_exec_count));
+ } else
+ (rp + 2)->last_line = lastline;
+
+ if (do_profiling) {
+ (void) list_prepend(pattern, instruction(Op_exec_count));
+ (void) list_prepend(action, instruction(Op_exec_count));
+ }
+ (rp + 1)->firsti = action->nexti;
+ (rp + 1)->lasti = tp;
+ ip = list_append(
+ list_merge(list_prepend(pattern, rp),
+ action),
+ tp);
+ }
+
+ }
+
+ list_append(rule_list, rp + 1);
+
+ if (rule_block[rule] == NULL)
+ rule_block[rule] = ip;
+ else
+ (void) list_merge(rule_block[rule], ip);
+
+ return rule_block[rule];
+}
+
+/* mk_assignment --- assignment bytecodes */
+
+static INSTRUCTION *
+mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op)
+{
+ INSTRUCTION *tp;
+ INSTRUCTION *ip;
+
+ tp = lhs->lasti;
+ switch (tp->opcode) {
+ case Op_field_spec:
+ tp->opcode = Op_field_spec_lhs;
+ break;
+ case Op_subscript:
+ tp->opcode = Op_subscript_lhs;
+ break;
+ case Op_push:
+ case Op_push_array:
+ tp->opcode = Op_push_lhs;
+ break;
+ default:
+ cant_happen();
+ }
+
+ tp->do_reference = (op->opcode != Op_assign); /* check for uninitialized reference */
+
+ if (rhs != NULL)
+ ip = list_merge(rhs, lhs);
+ else
+ ip = lhs;
+
+ (void) list_append(ip, op);
+
+ if (tp->opcode == Op_push_lhs
+ && tp->memory->type == Node_var
+ && tp->memory->var_assign
+ ) {
+ tp->do_reference = FALSE; /* no uninitialized reference checking
+ * for a special variable.
+ */
+ (void) list_append(ip, instruction(Op_var_assign));
+ ip->lasti->memory = tp->memory;
+ } else if (tp->opcode == Op_field_spec_lhs) {
+ (void) list_append(ip, instruction(Op_field_assign));
+ ip->lasti->field_assign = (Func_ptr) 0;
+ tp->target_assign = ip->lasti;
+ }
+
+ return ip;
+}
+
+/* optimize_assignment --- peephole optimization for assignment */
+
+static INSTRUCTION *
+optimize_assignment(INSTRUCTION *exp)
+{
+ INSTRUCTION *i1;
+ INSTRUCTION *i2;
+ INSTRUCTION *i3;
+
+ /*
+ * Optimize assignment statements array[subs] = x; var = x; $n = x;
+ * string concatenation of the form s = s t.
+ *
+ * 1) Array element assignment array[subs] = x:
+ * Replaces Op_push_array + Op_subscript_lhs + Op_assign + Op_pop
+ * with single instruction Op_store_sub.
+ * Limitation (FIXME): 1 dimension and sub is simple var/value.
+ *
+ * 2) Simple variable assignment var = x:
+ * Replaces Op_push_lhs + Op_assign + Op_pop with Op_store_var.
+ *
+ * 3) Field assignment $n = x:
+ * Replaces Op_field_spec_lhs + Op_assign + Op_field_assign + Op_pop
+ * with Op_store_field.
+ *
+ * 4) Optimization for string concatenation:
+ * For cases like x = x y, uses realloc to include y in x;
+ * also eliminates instructions Op_push_lhs and Op_pop.
+ */
+
+ /*
+ * N.B.: do not append Op_pop instruction to the returned
+ * instruction list if optimized. None of these
+ * optimized instructions push the r-value of assignment
+ * onto the runtime stack.
+ */
+
+ i2 = NULL;
+ i1 = exp->lasti;
+
+ if ( ! do_optimize
+ || ( i1->opcode != Op_assign
+ && i1->opcode != Op_field_assign)
+ )
+ return list_append(exp, instruction(Op_pop));
+
+ for (i2 = exp->nexti; i2 != i1; i2 = i2->nexti) {
+ switch (i2->opcode) {
+ case Op_concat:
+ if (i2->nexti->opcode == Op_push_lhs /* l.h.s is a simple variable */
+ && (i2->concat_flag & CSVAR) /* 1st exp in r.h.s is a simple variable;
+ * see Op_concat in the grammer above.
+ */
+ && i2->nexti->memory == exp->nexti->memory /* and the same as in l.h.s */
+ && i2->nexti->nexti == i1
+ && i1->opcode == Op_assign
+ ) {
+ /* s = s ... optimization */
+
+ /* avoid stuff like x = x (x = y) or x = x gsub(/./, "b", x);
+ * check for l-value reference to this variable in the r.h.s.
+ * Also avoid function calls in general, to guard against
+ * global variable assignment.
+ */
+
+ for (i3 = exp->nexti->nexti; i3 != i2; i3 = i3->nexti) {
+ if ((i3->opcode == Op_push_lhs && i3->memory == i2->nexti->memory)
+ || i3->opcode == Op_func_call)
+ return list_append(exp, instruction(Op_pop)); /* no optimization */
+ }
+
+ /* remove the variable from r.h.s */
+ i3 = exp->nexti;
+ exp->nexti = i3->nexti;
+ bcfree(i3);
+
+ if (--i2->expr_count == 1) /* one less expression in Op_concat */
+ i2->opcode = Op_no_op;
+
+ i3 = i2->nexti;
+ assert(i3->opcode == Op_push_lhs);
+ i3->opcode = Op_assign_concat; /* change Op_push_lhs to Op_assign_concat */
+ i3->nexti = NULL;
+ bcfree(i1); /* Op_assign */
+ exp->lasti = i3; /* update Op_list */
+ return exp;
+ }
+ break;
+
+ case Op_field_spec_lhs:
+ if (i2->nexti->opcode == Op_assign
+ && i2->nexti->nexti == i1
+ && i1->opcode == Op_field_assign
+ ) {
+ /* $n = .. */
+ i2->opcode = Op_store_field;
+ bcfree(i2->nexti); /* Op_assign */
+ i2->nexti = NULL;
+ bcfree(i1); /* Op_field_assign */
+ exp->lasti = i2; /* update Op_list */
+ return exp;
+ }
+ break;
+
+ case Op_push_array:
+ if (i2->nexti->nexti->opcode == Op_subscript_lhs) {
+ i3 = i2->nexti->nexti;
+ if (i3->sub_count == 1
+ && i3->nexti == i1
+ && i1->opcode == Op_assign
+ ) {
+ /* array[sub] = .. */
+ i3->opcode = Op_store_sub;
+ i3->memory = i2->memory;
+ i3->expr_count = 1; /* sub_count shadows memory,
+ * so use expr_count instead.
+ */
+ i3->nexti = NULL;
+ i2->opcode = Op_no_op;
+ bcfree(i1); /* Op_assign */
+ exp->lasti = i3; /* update Op_list */
+ return exp;
+ }
+ }
+ break;
+
+ case Op_push_lhs:
+ if (i2->nexti == i1
+ && i1->opcode == Op_assign
+ ) {
+ /* var = .. */
+ i2->opcode = Op_store_var;
+ i2->nexti = NULL;
+ bcfree(i1); /* Op_assign */
+ exp->lasti = i2; /* update Op_list */
+ return exp;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ /* no optimization */
+ return list_append(exp, instruction(Op_pop));
+}
+
+
+/* mk_getline --- make instructions for getline */
+
+static INSTRUCTION *
+mk_getline(INSTRUCTION *op, INSTRUCTION *var, INSTRUCTION *redir, OPCODE redirtype)
+{
+ INSTRUCTION *ip;
+ INSTRUCTION *tp;
+ INSTRUCTION *asgn = NULL;
+
+ /*
+ * getline [var] < [file]
+ *
+ * [ file (simp_exp)]
+ * [ [ var ] ]
+ * [ Op_K_getline_redir|NULL|redir_type|into_var]
+ * [ [var_assign] ]
+ *
+ */
+
+ if (redir == NULL) {
+ int sline = op->source_line;
+ bcfree(op);
+ op = bcalloc(Op_K_getline, 2, sline);
+ (op + 1)->target_endfile = ip_endfile;
+ (op + 1)->target_beginfile = ip_beginfile;
+ }
+
+ if (var != NULL) {
+ tp = make_assignable(var->lasti);
+ assert(tp != NULL);
+
+ /* check if we need after_assign bytecode */
+ if (tp->opcode == Op_push_lhs
+ && tp->memory->type == Node_var
+ && tp->memory->var_assign
+ ) {
+ asgn = instruction(Op_var_assign);
+ asgn->memory = tp->memory;
+ } else if (tp->opcode == Op_field_spec_lhs) {
+ asgn = instruction(Op_field_assign);
+ asgn->field_assign = (Func_ptr) 0; /* determined at run time */
+ tp->target_assign = asgn;
+ }
+ if (redir != NULL) {
+ ip = list_merge(redir, var);
+ (void) list_append(ip, op);
+ } else
+ ip = list_append(var, op);
+ } else if (redir != NULL)
+ ip = list_append(redir, op);
+ else
+ ip = list_create(op);
+ op->into_var = (var != NULL);
+ op->redir_type = (redir != NULL) ? redirtype : 0;
+
+ return (asgn == NULL ? ip : list_append(ip, asgn));
+}
+
+
+/* mk_for_loop --- for loop bytecodes */
+
+static INSTRUCTION *
+mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
+ INSTRUCTION *incr, INSTRUCTION *body)
+{
+ /*
+ * [ Op_push_loop | z| y] <-- continue | break
+ * ------------------------
+ * init (may be NULL)
+ * ------------------------
+ * x:
+ * cond (Op_no_op if NULL)
+ * ------------------------
+ * [ Op_jmp_false y ]
+ * ------------------------
+ * body (may be NULL)
+ * ------------------------
+ * z:
+ * incr (may be NULL)
+ * [ Op_jmp x ]
+ * ------------------------
+ * y:[ Op_pop_loop ]
+ */
+
+ INSTRUCTION *ip;
+ INSTRUCTION *cp;
+ INSTRUCTION *jmp;
+ INSTRUCTION *pp_cond;
+ INSTRUCTION *ret;
+
+ cp = instruction(Op_pop_loop);
+
+ forp->opcode = Op_push_loop;
+ forp->target_break = cp;
+ ip = list_create(forp);
+
+ if (init != NULL)
+ (void) list_merge(ip, init);
+
+ if (cond != NULL) {
+ add_lint(cond, LINT_assign_in_cond);
+ pp_cond = cond->nexti;
+ (void) list_merge(ip, cond);
+ (void) list_append(ip, instruction(Op_jmp_false));
+ ip->lasti->target_jmp = cp;
+ } else {
+ pp_cond = instruction(Op_no_op);
+ (void) list_append(ip, pp_cond);
+ }
+
+ if (do_profiling) {
+ (void) list_append(ip, instruction(Op_exec_count));
+ (forp + 1)->opcode = Op_K_for;
+ (forp + 1)->forloop_cond = pp_cond;
+ (forp + 1)->forloop_body = ip->lasti;
+ }
+
+ if (body != NULL)
+ (void) list_merge(ip, body);
+
+ if (incr != NULL) {
+ forp->target_continue = incr->nexti;
+ (void) list_merge(ip, incr);
+ }
+ jmp = instruction(Op_jmp);
+ jmp->target_jmp = pp_cond;
+ if (incr == NULL)
+ forp->target_continue = jmp;
+ (void) list_append(ip, jmp);
+
+ ret = list_append(ip, cp);
+
+ fix_break_continue(forp, cp, TRUE);
+
+ return ret;
+}
+
+/* add_lint --- add lint warning bytecode if needed */
+
+static void
+add_lint(INSTRUCTION *list, LINTTYPE linttype)
+{
+#ifndef NO_LINT
+ INSTRUCTION *ip;
+
+ switch (linttype) {
+ case LINT_assign_in_cond:
+ ip = list->lasti;
+ if (ip->opcode == Op_var_assign || ip->opcode == Op_field_assign) {
+ assert(ip != list->nexti);
+ for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti)
+ ;
+ }
+
+ if (ip->opcode == Op_assign || ip->opcode == Op_assign_concat) {
+ list_append(list, instruction(Op_lint));
+ list->lasti->lint_type = linttype;
+ }
+ break;
+
+ case LINT_no_effect:
+ if (list->lasti->opcode == Op_pop && list->nexti != list->lasti) {
+ for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti)
+ ;
+
+ if (do_lint) { /* compile-time warning */
+ if (isnoeffect(ip->opcode))
+ lintwarn(_("statement may have no effect"));
+ }
+
+ if (ip->opcode == Op_push) { /* run-time warning */
+ list_append(list, instruction(Op_lint));
+ list->lasti->lint_type = linttype;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+#endif
+}
+
+/* mk_expression_list --- list of bytecode lists */
+
+static INSTRUCTION *
+mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1)
+{
+ INSTRUCTION *r;
+
+ /* we can't just combine all bytecodes, since we need to
+ * process individual expressions for a few builtins in snode() (-:
+ */
+
+ /* -- list of lists */
+ /* [Op_list| ... ]------
+ * |
+ * [Op_list| ... ] -- |
+ * ... | |
+ * ... <------- |
+ * [Op_list| ... ] -- |
+ * ... | |
+ * ... | |
+ * ... <------- --
+ */
+
+ assert(s1 != NULL && s1->opcode == Op_list);
+ if (list == NULL) {
+ list = instruction(Op_list);
+ list->nexti = s1;
+ list->lasti = s1->lasti;
+ return list;
+ }
+
+ /* append expression to the end of the list */
+
+ r = list->lasti;
+ r->nexti = s1;
+ list->lasti = s1->lasti;
+ return list;
+}
+
+/* count_expressions --- fixup expression_list from mk_expression_list.
+ * returns no of expressions in list. isarg is true
+ * for function arguments.
+ */
+
+static int
+count_expressions(INSTRUCTION **list, int isarg)
+{
+ INSTRUCTION *expr;
+ INSTRUCTION *r = NULL;
+ int count = 0;
+
+ if (*list == NULL) /* error earlier */
+ return 0;
+
+ for (expr = (*list)->nexti; expr; ) {
+ INSTRUCTION *t1, *t2;
+ t1 = expr->nexti;
+ t2 = expr->lasti;
+ if (isarg && t1 == t2 && t1->opcode == Op_push)
+ t1->opcode = Op_push_param;
+ if (++count == 1)
+ r = expr;
+ else
+ (void) list_merge(r, expr);
+ expr = t2->nexti;
+ }
+
+ assert(count > 0);
+ if (! isarg && count > max_args)
+ max_args = count;
+ bcfree(*list);
+ *list = r;
+ return count;
+}
+
+/* fix_break_continue --- fix up break & continue nodes in loop bodies */
+
+static void
+fix_break_continue(INSTRUCTION *start, INSTRUCTION *end, int check_continue)
+{
+ INSTRUCTION *ip, *b_target, *c_target;
+
+ assert(start->opcode == Op_push_loop);
+ assert(end->opcode == Op_pop_loop);
+
+ b_target = start->target_break;
+ c_target = start->target_continue;
+
+ for (ip = start; ip != end; ip = ip->nexti) {
+ switch (ip->opcode) {
+ case Op_K_break:
+ if (ip->target_jmp == NULL)
+ ip->target_jmp = b_target;
+ break;
+
+ case Op_K_continue:
+ if (check_continue && ip->target_jmp == NULL)
+ ip->target_jmp = c_target;
+ break;
+
+ default:
+ /* this is to keep the compiler happy. sheesh. */
+ break;
+ }
+ }
+}
+
+
+/* append_symbol --- append symbol to the list of symbols
+ * installed in the symbol table.
+ */
+
+void
+append_symbol(char *name)
+{
+ NODE *hp;
+
+ /* N.B.: func_install removes func name and reinstalls it;
+ * and we get two entries for it here!. destroy_symbol()
+ * will find and destroy the Node_func which is what we want.
+ */
+
+ getnode(hp);
+ hp->hname = name; /* shallow copy */
+ hp->hnext = symbol_list->hnext;
+ symbol_list->hnext = hp;
+}
+
+void
+release_symbols(NODE *symlist, int keep_globals)
+{
+ NODE *hp, *n;
+
+ for (hp = symlist->hnext; hp != NULL; hp = n) {
+ if (! keep_globals) {
+ /* destroys globals, function, and params
+ * if still in symbol table and not removed by func_install
+ * due to parse error.
+ */
+ destroy_symbol(hp->hname);
+ }
+ n = hp->hnext;
+ freenode(hp);
+ }
+ symlist->hnext = NULL;
+}
+
+/* destroy_symbol --- remove a symbol from symbol table
+* and free all associated memory.
+*/
+
+
+void
+destroy_symbol(char *name)
+{
+ NODE *symbol, *hp;
+
+ symbol = lookup(name);
+ if (symbol == NULL)
+ return;
+
+ if (symbol->type == Node_func) {
+ char **varnames;
+ NODE *func, *n;
+
+ func = symbol;
+ varnames = func->parmlist;
+ if (varnames != NULL)
+ efree(varnames);
+
+ /* function parameters of type Node_param_list */
+ for (n = func->lnode->rnode; n != NULL; ) {
+ NODE *np;
+ np = n->rnode;
+ efree(n->param);
+ freenode(n);
+ n = np;
+ }
+ freenode(func->lnode);
+ func_count--;
+
+ } else if (symbol->type == Node_var_array)
+ assoc_clear(symbol);
+ else if (symbol->type == Node_var)
+ unref(symbol->var_value);
+
+ /* remove from symbol table */
+ hp = remove_symbol(name);
+ efree(hp->hname);
+ freenode(hp->hvalue);
+ freenode(hp);
+}
+
+#define pool_size d.dl
+#define freei x.xi
+static INSTRUCTION *pool_list;
+static CONTEXT *curr_ctxt = NULL;
+
+
+CONTEXT *
+new_context()
+{
+ CONTEXT *ctxt;
+
+ emalloc(ctxt, CONTEXT *, sizeof(CONTEXT), "new_context");
+ memset(ctxt, 0, sizeof(CONTEXT));
+ ctxt->srcfiles.next = ctxt->srcfiles.prev = &ctxt->srcfiles;
+ ctxt->rule_list.opcode = Op_list;
+ ctxt->rule_list.lasti = &ctxt->rule_list;
+ if (curr_ctxt == NULL) {
+ ctxt->level = 0;
+ pool_list = &ctxt->pools;
+ symbol_list = &ctxt->symbols;
+ srcfiles = &ctxt->srcfiles;
+ rule_list = &ctxt->rule_list;
+ install_func = ctxt->install_func;
+ curr_ctxt = ctxt;
+ } else
+ ctxt->level = curr_ctxt->level + 1; /* this assumes contexts don't overlap each other ? */
+
+ return ctxt;
+}
+
+
+/* N.B.: new context (level > 0) inherits all command line options;
+ * probably should restore defaults for lint etc.
+ */
+
+CONTEXT *
+set_context(CONTEXT *ctxt)
+{
+ assert(curr_ctxt != NULL);
+ if (curr_ctxt == ctxt)
+ goto update;
+
+ /* save current source and sourceline */
+ curr_ctxt->sourceline = sourceline;
+ curr_ctxt->source = source;
+
+ if (ctxt == NULL) {
+ assert(curr_ctxt->prev != NULL);
+ ctxt = curr_ctxt->prev;
+ } else
+ ctxt->prev = curr_ctxt;
+
+ /* restore source and sourceline */
+ sourceline = ctxt->sourceline;
+ source = ctxt->source;
+
+update:
+ pool_list = &ctxt->pools;
+ symbol_list = &ctxt->symbols;
+ srcfiles = &ctxt->srcfiles;
+ rule_list = &ctxt->rule_list;
+ install_func = ctxt->install_func;
+ curr_ctxt = ctxt;
+ return curr_ctxt;
+}
+
+CONTEXT *
+get_context()
+{
+ assert(curr_ctxt != NULL);
+ return curr_ctxt;
+}
+
+void
+free_context(CONTEXT *ctxt, int keep_globals)
+{
+ SRCFILE *s, *sn;
+
+ if (ctxt == NULL)
+ return;
+
+ assert(curr_ctxt != ctxt);
+
+ /* free all code including function codes */
+ free_bcpool(&ctxt->pools);
+ /* free symbols */
+ release_symbols(&ctxt->symbols, keep_globals);
+ /* free srcfiles */
+ for (s = &ctxt->srcfiles; s != &ctxt->srcfiles; s = sn) {
+ sn = s->next;
+ if (s->stype != SRC_CMDLINE && s->stype != SRC_STDIN)
+ efree(s->fullpath);
+ efree(s->src);
+ efree(s);
+ }
+ efree(ctxt);
+}
+
+static void
+free_bc_internal(INSTRUCTION *cp)
+{
+ INSTRUCTION *curr;
+ NODE *m;
+
+ switch(cp->opcode) {
+ case Op_func_call:
+ if (cp->func_name != NULL
+ && cp->func_name != builtin_func
+ )
+ efree(cp->func_name);
+ break;
+ case Op_K_switch:
+ for (curr = cp->case_val; curr != NULL; curr = curr->nexti) {
+ if (curr->opcode == Op_K_case &&
+ curr->memory->type != Node_val
+ ) {
+ m = curr->memory;
+ if (m->re_text != NULL)
+ unref(m->re_text);
+ if (m->re_reg != NULL)
+ refree(m->re_reg);
+ if (m->re_exp != NULL)
+ unref(m->re_exp);
+ freenode(m);
+ }
+ }
+ break;
+ case Op_push_re:
+ case Op_match_rec:
+ case Op_match:
+ case Op_nomatch:
+ m = cp->memory;
+ if (m->re_reg != NULL)
+ refree(m->re_reg);
+ if (m->re_exp != NULL)
+ unref(m->re_exp);
+ if (m->re_text != NULL)
+ unref(m->re_text);
+ freenode(m);
+ break;
+ case Op_token: /* token lost during error recovery in yyparse */
+ if (cp->lextok != NULL)
+ efree(cp->lextok);
+ break;
+ case Op_illegal:
+ cant_happen();
+ default:
+ break;
+ }
+}
+
+
+/* INSTR_CHUNK must be > largest code size (3) */
+#define INSTR_CHUNK 127
+
+/* bcfree --- deallocate instruction */
+
+void
+bcfree(INSTRUCTION *cp)
+{
+ cp->opcode = 0;
+ cp->nexti = pool_list->freei;
+ pool_list->freei = cp;
+}
+
+
+/* bcalloc --- allocate a new instruction */
+
+INSTRUCTION *
+bcalloc(OPCODE op, int size, int srcline)
+{
+ INSTRUCTION *cp;
+
+ if (size > 1) {
+ /* wide instructions Op_rule, Op_func_call .. */
+ emalloc(cp, INSTRUCTION *, (size + 1) * sizeof(INSTRUCTION), "bcalloc");
+ cp->pool_size = size;
+ cp->nexti = pool_list->nexti;
+ pool_list->nexti = cp++;
+ } else {
+ INSTRUCTION *pool;
+
+ pool = pool_list->freei;
+ if (pool == NULL) {
+ INSTRUCTION *last;
+ emalloc(cp, INSTRUCTION *, (INSTR_CHUNK + 1) * sizeof(INSTRUCTION), "bcalloc");
+
+ cp->pool_size = INSTR_CHUNK;
+ cp->nexti = pool_list->nexti;
+ pool_list->nexti = cp;
+ pool = ++cp;
+ last = &pool[INSTR_CHUNK - 1];
+ for (; cp <= last; cp++) {
+ cp->opcode = 0;
+ cp->nexti = cp + 1;
+ }
+ --cp;
+ cp->nexti = NULL;
+ }
+ cp = pool;
+ pool_list->freei = cp->nexti;
+ }
+
+ memset(cp, 0, size * sizeof(INSTRUCTION));
+ cp->opcode = op;
+ cp->source_line = srcline;
+ return cp;
+}
+
+
+static void
+free_bcpool(INSTRUCTION *pl)
+{
+ INSTRUCTION *pool, *tmp;
+
+ for (pool = pl->nexti; pool != NULL; pool = tmp) {
+ INSTRUCTION *cp, *last;
+ long psiz;
+ psiz = pool->pool_size;
+ if (psiz == INSTR_CHUNK)
+ last = pool + psiz;
+ else
+ last = pool + 1;
+ for (cp = pool + 1; cp <= last ; cp++) {
+ if (cp->opcode != 0)
+ free_bc_internal(cp);
+ }
+ tmp = pool->nexti;
+ efree(pool);
+ }
+ memset(pl, 0, sizeof(INSTRUCTION));
+}
+
+
+static inline INSTRUCTION *
+list_create(INSTRUCTION *x)
+{
+ INSTRUCTION *l;
+
+ l = instruction(Op_list);
+ l->nexti = x;
+ l->lasti = x;
+ return l;
+}
+
+static inline INSTRUCTION *
+list_append(INSTRUCTION *l, INSTRUCTION *x)
+{
+#ifdef GAWKDEBUG
+ if (l->opcode != Op_list)
+ cant_happen();
+#endif
+ l->lasti->nexti = x;
+ l->lasti = x;
+ return l;
+}
+
+static inline INSTRUCTION *
+list_prepend(INSTRUCTION *l, INSTRUCTION *x)
+{
+#ifdef GAWKDEBUG
+ if (l->opcode != Op_list)
+ cant_happen();
+#endif
+ x->nexti = l->nexti;
+ l->nexti = x;
+ return l;
+}
+
+static inline INSTRUCTION *
+list_merge(INSTRUCTION *l1, INSTRUCTION *l2)
+{
+#ifdef GAWKDEBUG
+ if (l1->opcode != Op_list)
+ cant_happen();
+ if (l2->opcode != Op_list)
+ cant_happen();
+#endif
+ l1->lasti->nexti = l2->nexti;
+ l1->lasti = l2->lasti;
+ bcfree(l2);
+ return l1;
+}
+
/* See if name is a special token. */
int
@@ -3589,7 +5849,6 @@ check_special(const char *name)
}
#endif
-
low = 0;
high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
while (low <= high) {
@@ -3628,7 +5887,7 @@ read_one_line(int fd, void *buffer, size_t count)
fp = fdopen(fd, "r");
if (fp == NULL) {
fprintf(stderr, "ugh. fdopen: %s\n", strerror(errno));
- exit(EXIT_FAILURE);
+ gawk_exit(EXIT_FAILURE);
}
}
@@ -3654,161 +5913,3 @@ one_line_close(int fd)
return ret;
}
-/* constant_fold --- try to fold constant operations */
-
-static NODE *
-constant_fold(NODE *left, NODETYPE op, NODE *right)
-{
- AWKNUM result;
- extern double fmod P((double x, double y));
-
- if (! do_optimize)
- return node(left, op, right);
-
- /* Unary not */
- if (right == NULL) {
- if (op == Node_not && left->type == Node_val) {
- if ((left->flags & (STRCUR|STRING)) != 0) {
- NODE *ret;
- if (left->stlen == 0) {
- ret = make_number((AWKNUM) 1.0);
- } else {
- ret = make_number((AWKNUM) 0.0);
- }
- unref(left);
-
- return ret;
- } else {
- if (left->numbr == 0) {
- left->numbr = 1.0;
- } else {
- left->numbr = 0.0;
- }
-
- return left;
- }
- }
-
- return node(left, op, right);
- }
-
- /* String concatentation of two string constants */
- if (op == Node_concat
- && left->type == Node_val
- && (left->flags & (STRCUR|STRING)) != 0
- && right->type == Node_val
- && (right->flags & (STRCUR|STRING)) != 0) {
- size_t newlen = left->stlen + right->stlen + 2;
-
- erealloc(left->stptr, char *, newlen, "constant_fold");
- memcpy(left->stptr + left->stlen, right->stptr, right->stlen);
- left->stptr[left->stlen + right->stlen] = '\0';
- left->stlen += right->stlen;
-
- unref(right);
- return left;
- }
-
- /*
- * From here down, numeric operations.
- * Check for string and bail out if have them.
- */
- if (left->type != Node_val
- || (left->flags & (STRCUR|STRING)) != 0
- || right->type != Node_val
- || (right->flags & (STRCUR|STRING)) != 0) {
- return node(left, op, right);
- }
-
- /* Numeric operations: */
- switch (op) {
- case Node_not:
- case Node_exp:
- case Node_times:
- case Node_quotient:
- case Node_mod:
- case Node_plus:
- case Node_minus:
- break;
- default:
- return node(left, op, right);
- }
-
- left->numbr = force_number(left);
- right->numbr = force_number(right);
-
- result = left->numbr;
- switch (op) {
- case Node_exp:
- result = calc_exp(left->numbr, right->numbr);
- break;
- case Node_times:
- result *= right->numbr;
- break;
- case Node_quotient:
- if (right->numbr == 0)
- fatal(_("division by zero attempted in `/'"));
- result /= right->numbr;
- break;
- case Node_mod:
- if (right->numbr == 0)
- fatal(_("division by zero attempted in `%%'"));
-#ifdef HAVE_FMOD
- result = fmod(result, right->numbr);
-#else /* ! HAVE_FMOD */
- (void) modf(left->numbr / right->numbr, &result);
- result = left->numbr - result * right->numbr;
-#endif /* ! HAVE_FMOD */
- break;
- case Node_plus:
- result += right->numbr;
- break;
- case Node_minus:
- result -= right->numbr;
- break;
- default:
- /* Shut up compiler warnings */
- fatal("can't happen");
- break;
- }
-
- unref(right);
- left->numbr = result;
-
- return left;
-}
-
-/* optimize_concat --- optimize the general "x = x y z a" case */
-
-static NODE *
-optimize_concat(NODE *left, NODETYPE op, NODE *right)
-{
- NODE *top, *leftmost;
-
- if (op != Node_assign)
- return node(left, op, right);
-
- /*
- * optimization of `x = x y'. can save lots of time
- * if done a lot.
- */
- if (( left->type == Node_var
- || left->type == Node_var_new
- || left->type == Node_param_list)
- && right->type == Node_concat) {
- /* find bottom of tree, save it */
- for (top = right; top->lnode != NULL && top->type == Node_concat; top = top->lnode) {
- leftmost = top->lnode;
- if (leftmost->type == Node_concat)
- continue;
-
- /* at this point, we've run out of concatentation */
- if (leftmost != left)
- return node(left, op, right);
-
- top->lnode = Nnull_string;
- return node(left, Node_assign_concat, right);
- }
- }
- return node(left, op, right);
-}