diff options
Diffstat (limited to 'awkgram.y')
-rw-r--r-- | awkgram.y | 5395 |
1 files changed, 3748 insertions, 1647 deletions
@@ -34,64 +34,103 @@ #define signed /**/ #endif -#define CAN_FREE TRUE -#define DONT_FREE FALSE - -#ifdef CAN_USE_STDARG_H static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1; -#else -static void yyerror(); /* va_alist */ -#endif -static char *get_src_buf P((void)); -static int yylex P((void)); -static NODE *node_common P((NODETYPE op)); -static NODE *snode P((NODE *subn, NODETYPE op, int sindex)); -static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr)); -static NODE *append_right P((NODE *list, NODE *new)); -static inline NODE *append_pattern P((NODE **list, NODE *patt)); -static void func_install P((NODE *params, NODE *def)); -static void pop_var P((NODE *np, int freeit)); -static void pop_params P((NODE *params)); -static NODE *make_param P((char *name)); -static NODE *mk_rexp P((NODE *exp)); -static int dup_parms P((NODE *func)); -static void param_sanity P((NODE *arglist)); -static int parms_shadow P((const char *fname, NODE *func)); -static int isnoeffect P((NODETYPE t)); -static int isassignable P((NODE *n)); -static void dumpintlstr P((const char *str, size_t len)); -static void dumpintlstr2 P((const char *str1, size_t len1, const char *str2, size_t len2)); -static void count_args P((NODE *n)); -static int isarray P((NODE *n)); +static char *get_src_buf(void); +static int yylex(void); +int yyparse(void); +static INSTRUCTION *snode(INSTRUCTION *subn, INSTRUCTION *op); +static int func_install(INSTRUCTION *fp, INSTRUCTION *def); +static void pop_params(NODE *params); +static NODE *make_param(char *pname); +static NODE *mk_rexp(INSTRUCTION *exp); +static void append_param(char *pname); +static int dup_parms(NODE *func); +static void param_sanity(INSTRUCTION *arglist); +static int parms_shadow(INSTRUCTION *pc, int *shadow); +static int isnoeffect(OPCODE type); +static INSTRUCTION *make_assignable(INSTRUCTION *ip); +static void dumpintlstr(const char *str, size_t len); +static void dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2); +static int isarray(NODE *n); +static int include_source(char *src); +static void next_sourcefile(void); +static char *tokexpand(void); + +#define instruction(t) bcalloc(t, 1, 0) + +static INSTRUCTION *mk_program(void); +static INSTRUCTION *append_rule(INSTRUCTION *pattern, INSTRUCTION *action); +static INSTRUCTION *mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch, + INSTRUCTION *elsep, INSTRUCTION *false_branch); +static INSTRUCTION *mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1); +static INSTRUCTION *mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond, + INSTRUCTION *incr, INSTRUCTION *body); +static void fix_break_continue(INSTRUCTION *start, INSTRUCTION *end, int check_continue); +static INSTRUCTION *mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op); +static INSTRUCTION *mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op); +static INSTRUCTION *mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op); +static INSTRUCTION *mk_getline(INSTRUCTION *op, INSTRUCTION *opt_var, INSTRUCTION *redir, OPCODE redirtype); +static NODE *make_regnode(int type, NODE *exp); +static int count_expressions(INSTRUCTION **list, int isarg); +static INSTRUCTION *optimize_assignment(INSTRUCTION *exp); +static void add_lint(INSTRUCTION *list, LINTTYPE linttype); enum defref { FUNC_DEFINE, FUNC_USE }; -static void func_use P((const char *name, enum defref how)); -static void check_funcs P((void)); +static void func_use(const char *name, enum defref how); +static void check_funcs(void); +static void free_bcpool(INSTRUCTION *pl); -static ssize_t read_one_line P((int fd, void *buffer, size_t count)); -static int one_line_close P((int fd)); +static ssize_t read_one_line(int fd, void *buffer, size_t count); +static int one_line_close(int fd); -static NODE *constant_fold P((NODE *left, NODETYPE op, NODE *right)); -static NODE *optimize_concat P((NODE *left, NODETYPE op, NODE *right)); +static void (*install_func)(char *) = NULL; +static int want_source = FALSE; static int want_regexp; /* lexical scanning kludge */ static int can_return; /* parsing kludge */ -static int begin_or_end_rule = FALSE; /* parsing kludge */ -static int parsing_end_rule = FALSE; /* for warnings */ -static int beginfile_or_endfile_rule = FALSE; /* parsing kludge */ -static int parsing_endfile_rule = FALSE; /* for warnings */ +static int rule = 0; + +const char *const ruletab[] = { + "?", + "BEGIN", + "Rule", + "END", + "BEGINFILE", + "ENDFILE", +}; + static int in_print = FALSE; /* lexical scanning kludge for print */ static int in_parens = 0; /* lexical scanning kludge for print */ -static char *lexptr; /* pointer to next char during parsing */ +static int sub_counter = 0; /* array dimension counter for use in delete */ +static char *lexptr = NULL; /* pointer to next char during parsing */ static char *lexend; static char *lexptr_begin; /* keep track of where we were for error msgs */ static char *lexeme; /* beginning of lexeme for debugging */ +static int lexeof; /* seen EOF for current source? */ static char *thisline = NULL; +static int in_braces = 0; /* count braces for firstline, lastline in an 'action' */ +static int lastline = 0; +static int firstline = 0; +static SRCFILE *sourcefile = NULL; /* current program source */ +static int lasttok = 0; +static int eof_warned = FALSE; /* GLOBAL: want warning for each file */ +static int break_allowed; /* kludge for break */ +static int continue_allowed; /* kludge for continue */ + + +#define END_FILE -1000 +#define END_SRC -2000 + #define YYDEBUG_LEXER_TEXT (lexeme) static int param_counter; +static NODE *func_params; /* list of parameters for the current function */ static char *tokstart = NULL; static char *tok = NULL; static char *tokend; +static int errcount = 0; + +static NODE *symbol_list; +extern void destroy_symbol(char *name); static long func_count; /* total number of functions */ @@ -101,14 +140,25 @@ static int var_count; /* total number of global variables */ extern char *source; extern int sourceline; -extern struct src *srcfiles; -extern long numfiles; -extern int errcount; -extern NODE *begin_block; -extern NODE *end_block; -extern NODE *beginfile_block; -extern NODE *endfile_block; +extern SRCFILE *srcfiles; +extern INSTRUCTION *rule_list; +extern int max_args; + +static INSTRUCTION *rule_block[sizeof(ruletab)]; + +static INSTRUCTION *ip_rec; +static INSTRUCTION *ip_newfile; +static INSTRUCTION *ip_atexit = NULL; +static INSTRUCTION *ip_end; +static INSTRUCTION *ip_endfile; +static INSTRUCTION *ip_beginfile; +static inline INSTRUCTION *list_create(INSTRUCTION *x); +static inline INSTRUCTION *list_append(INSTRUCTION *l, INSTRUCTION *x); +static inline INSTRUCTION *list_prepend(INSTRUCTION *l, INSTRUCTION *x); +static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2); + +extern double fmod(double x, double y); /* * This string cannot occur as a real awk identifier. * Use it as a special token to make function parsing @@ -119,48 +169,27 @@ extern NODE *endfile_block; * should only produce one error message, and not core dump. */ static char builtin_func[] = "@builtin"; + +#define YYSTYPE INSTRUCTION * %} -%union { - long lval; - AWKNUM fval; - NODE *nodeval; - NODETYPE nodetypeval; - char *sval; - NODE *(*ptrval) P((void)); -} - -%type <nodeval> function_prologue pattern action variable param_list -%type <nodeval> exp common_exp -%type <nodeval> simp_exp simp_exp_nc non_post_simp_exp -%type <nodeval> expression_list opt_expression_list print_expression_list -%type <nodeval> statements statement if_statement switch_body case_statements case_statement case_value opt_param_list -%type <nodeval> simple_stmt opt_simple_stmt -%type <nodeval> opt_exp opt_variable regexp -%type <nodeval> input_redir output_redir field_spec -%type <nodeval> function_call direct_function_call -%type <nodetypeval> print -%type <nodetypeval> assign_operator a_relop relop_or_less -%type <sval> func_name opt_incdec -%type <lval> lex_builtin - -%token <sval> FUNC_CALL NAME REGEXP -%token <lval> ERROR -%token <nodeval> YNUMBER YSTRING -%token <nodetypeval> RELOP IO_OUT IO_IN -%token <nodetypeval> ASSIGNOP ASSIGN MATCHOP CONCAT_OP -%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE -%token <nodetypeval> LEX_BEGINFILE LEX_ENDFILE -%token <nodetypeval> LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE -%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION -%token <nodetypeval> LEX_GETLINE LEX_NEXTFILE -%token <nodetypeval> LEX_IN -%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT -%token <lval> LEX_BUILTIN LEX_LENGTH +%token FUNC_CALL NAME REGEXP FILENAME +%token YNUMBER YSTRING +%token RELOP IO_OUT IO_IN +%token ASSIGNOP ASSIGN MATCHOP CONCAT_OP +%token SUBSCRIPT +%token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE +%token LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE +%token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION +%token LEX_BEGINFILE LEX_ENDFILE +%token LEX_GETLINE LEX_NEXTFILE +%token LEX_IN +%token LEX_AND LEX_OR INCREMENT DECREMENT +%token LEX_BUILTIN LEX_LENGTH +%token LEX_EOF +%token LEX_INCLUDE LEX_EVAL %token NEWLINE -/* these are just yylval numbers */ - /* Lowest to highest */ %right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL %right '?' ':' @@ -170,7 +199,7 @@ static char builtin_func[] = "@builtin"; %nonassoc LEX_IN %left FUNC_CALL LEX_BUILTIN LEX_LENGTH %nonassoc ',' -%nonassoc MATCHOP +%left MATCHOP %nonassoc RELOP '<' '>' IO_IN IO_OUT %left CONCAT_OP %left YSTRING YNUMBER @@ -183,81 +212,106 @@ static char builtin_func[] = "@builtin"; %left '(' ')' %% -start - : opt_nls program opt_nls - { - check_funcs(); - } - ; - program : /* empty */ | program rule { - beginfile_or_endfile_rule = begin_or_end_rule = parsing_end_rule = FALSE; + rule = 0; yyerrok; } + | program nls + | program LEX_EOF + { + next_sourcefile(); + } | program error - { - beginfile_or_endfile_rule = begin_or_end_rule = parsing_end_rule = FALSE; + { + rule = 0; /* * If errors, give up, don't produce an infinite * stream of syntax error messages. */ /* yyerrok; */ - } + } ; rule : pattern action { - $1->rnode = $2; + (void) append_rule($1, $2); } | pattern statement_term { - if ($1->lnode != NULL) { - /* pattern rule with non-empty pattern */ - $1->rnode = node(NULL, Node_K_print_rec, NULL); - } else { - /* an error */ - if (begin_or_end_rule) - msg(_("%s blocks must have an action part"), - (parsing_end_rule ? "END" : "BEGIN")); - else if (beginfile_or_endfile_rule) - msg(_("%s blocks must have an action part"), - (parsing_endfile_rule ? "ENDFILE" : "BEGINFILE")); - else - msg(_("each rule must have a pattern or an action part")); + if (rule != Rule) { + msg(_("%s blocks must have an action part"), ruletab[rule]); errcount++; - } + } else if ($1 == NULL) { + msg(_("each rule must have a pattern or an action part")); + errcount++; + } else /* pattern rule with non-empty pattern */ + (void) append_rule($1, NULL); } | function_prologue action { can_return = FALSE; - if ($1) - func_install($1, $2); + if ($1 && func_install($1, $2) < 0) + YYABORT; + func_params = NULL; + yyerrok; + } + | '@' LEX_INCLUDE source statement_term + { + want_source = FALSE; yyerrok; } ; -pattern - : /* empty */ +source + : FILENAME { - $$ = append_pattern(&expression_value, (NODE *) NULL); + char *src = $1->lextok; + if (include_source(src) < 0) + YYABORT; + efree(src); + bcfree($1); + $$ = NULL; } + | FILENAME error + { $$ = NULL; } + | error + { $$ = NULL; } + ; + +pattern + : /* empty */ + { $$ = NULL; rule = Rule; } | exp - { - $$ = append_pattern(&expression_value, $1); - } + { $$ = $1; rule = Rule; } | exp ',' exp { - NODE *r; - - getnode(r); - r->type = Node_line_range; - r->condpair = node($1, Node_cond_pair, $3); - r->triggered = FALSE; - $$ = append_pattern(&expression_value, r); + INSTRUCTION *tp; + + add_lint($1, LINT_assign_in_cond); + add_lint($3, LINT_assign_in_cond); + + tp = instruction(Op_no_op); + list_prepend($1, bcalloc(Op_line_range, !!do_profiling + 1, 0)); + $1->nexti->triggered = FALSE; + $1->nexti->target_jmp = $3->nexti; + + list_append($1, instruction(Op_cond_pair)); + $1->lasti->line_range = $1->nexti; + $1->lasti->target_jmp = tp; + + list_append($3, instruction(Op_cond_pair)); + $3->lasti->line_range = $1->nexti; + $3->lasti->target_jmp = tp; + if (do_profiling) { + ($1->nexti + 1)->condpair_left = $1->lasti; + ($1->nexti + 1)->condpair_right = $3->lasti; + } + $$ = list_append(list_merge($1, $3), tp); + rule = Rule; } | LEX_BEGIN { @@ -265,8 +319,9 @@ pattern if (do_lint_old && ++begin_seen == 2) warning(_("old awk does not support multiple `BEGIN' or `END' rules")); - begin_or_end_rule = TRUE; - $$ = append_pattern(&begin_block, (NODE *) NULL); + $1->in_rule = rule = BEGIN; + $1->source_file = source; + $$ = $1; } | LEX_END { @@ -274,39 +329,52 @@ pattern if (do_lint_old && ++end_seen == 2) warning(_("old awk does not support multiple `BEGIN' or `END' rules")); - begin_or_end_rule = parsing_end_rule = TRUE; - $$ = append_pattern(&end_block, (NODE *) NULL); + $1->in_rule = rule = END; + $1->source_file = source; + $$ = $1; } | LEX_BEGINFILE { - beginfile_or_endfile_rule = TRUE; - $$ = append_pattern(&beginfile_block, (NODE *) NULL); + $1->in_rule = rule = BEGINFILE; + $1->source_file = source; + $$ = $1; } | LEX_ENDFILE { - beginfile_or_endfile_rule = parsing_endfile_rule = TRUE; - $$ = append_pattern(&endfile_block, (NODE *) NULL); + $1->in_rule = rule = ENDFILE; + $1->source_file = source; + $$ = $1; } ; action : l_brace statements r_brace opt_semi opt_nls - { $$ = $2; } + { + if ($2 == NULL) + $$ = list_create(instruction(Op_no_op)); + else + $$ = $2; + } ; func_name : NAME - { $$ = $1; } + { $$ = $1; } | FUNC_CALL - { $$ = $1; } + { $$ = $1; } | lex_builtin { yyerror(_("`%s' is a built-in function, it cannot be redefined"), tokstart); - errcount++; - $$ = builtin_func; + $1->opcode = Op_symbol; /* Op_symbol instead of Op_token so that + * free_bc_internal does not try to free it + */ + $1->lextok = builtin_func; + $$ = $1; /* yyerrok; */ } + | '@' LEX_EVAL + { $$ = $2; } ; lex_builtin @@ -315,20 +383,26 @@ lex_builtin ; function_prologue - : LEX_FUNCTION - { - param_counter = 0; - } - func_name '(' opt_param_list r_paren opt_nls - { + : LEX_FUNCTION + { + param_counter = 0; + func_params = NULL; + } + func_name '(' opt_param_list r_paren opt_nls + { NODE *t; - t = make_param($3); + $1->source_file = source; + t = make_param($3->lextok); + $3->lextok = NULL; + bcfree($3); t->flags |= FUNC; - $$ = append_right(t, $5); + t->rnode = func_params; + func_params = t; + $$ = $1; can_return = TRUE; /* check for duplicate parameter names */ - if (dup_parms($$)) + if (dup_parms(t)) errcount++; } ; @@ -342,55 +416,56 @@ regexp { ++want_regexp; } REGEXP /* The terminating '/' is consumed by yylex(). */ { - NODE *n; - size_t len = strlen($3); + NODE *n, *exp; + char *re; + size_t len; + re = $3->lextok; + len = strlen(re); if (do_lint) { if (len == 0) lintwarn(_("regexp constant `//' looks like a C++ comment, but is not")); - else if (($3)[0] == '*' && ($3)[len-1] == '*') + else if ((re)[0] == '*' && (re)[len-1] == '*') /* possible C comment */ lintwarn(_("regexp constant `/%s/' looks like a C comment, but is not"), tokstart); } - getnode(n); - n->type = Node_regex; - n->re_exp = make_string($3, len); - n->re_reg = make_regexp($3, len, FALSE, TRUE); - n->re_text = NULL; - n->re_flags = CONSTANT; - n->re_cnt = 1; - $$ = n; + + exp = make_str_node(re, len, ALREADY_MALLOCED); + n = make_regnode(Node_regex, exp); + if (n == NULL) { + unref(exp); + YYABORT; + } + $$ = $3; + $$->opcode = Op_match_rec; + $$->memory = n; } ; a_slash : '/' + { bcfree($1); } | SLASH_BEFORE_EQUAL ; statements : /* empty */ - { $$ = NULL; } + { $$ = NULL; } | statements statement { if ($2 == NULL) $$ = $1; else { - if (do_lint && isnoeffect($2->type)) - lintwarn(_("statement may have no effect")); + add_lint($2, LINT_no_effect); if ($1 == NULL) $$ = $2; else - $$ = append_right( - ($1->type == Node_statement_list ? $1 - : node($1, Node_statement_list, (NODE *) NULL)), - ($2->type == Node_statement_list ? $2 - : node($2, Node_statement_list, (NODE *) NULL))); + $$ = list_merge($1, $2); } - yyerrok; + yyerrok; } | statements error - { $$ = NULL; } + { $$ = NULL; } ; statement_term @@ -400,21 +475,140 @@ statement_term statement : semi opt_nls - { $$ = NULL; } + { $$ = NULL; } | l_brace statements r_brace - { $$ = $2; } + { $$ = $2; } | if_statement - { $$ = $1; } + { + if (do_profiling) + $$ = list_prepend($1, instruction(Op_exec_count)); + else + $$ = $1; + } | LEX_SWITCH '(' exp r_paren opt_nls l_brace switch_body opt_nls r_brace - { $$ = node($3, Node_K_switch, $7); } + { + INSTRUCTION *ip; + + $1->opcode = Op_push_loop; + $1->target_continue = NULL; + ip = list_prepend($3, $1); + if ($7->nexti->switch_dflt == NULL) + $7->nexti->switch_dflt = $1; /* implicit break */ + if (do_profiling) { + (void) list_prepend(ip, instruction(Op_exec_count)); + ($1 + 1)->opcode = Op_K_switch; + ($1 + 1)->switch_body = $7->nexti; + } + (void) list_merge(ip, $7); + $$ = list_append(ip, instruction(Op_pop_loop)); + $1->target_break = $$->lasti; + + break_allowed--; + fix_break_continue($1, $$->lasti, FALSE); + } | LEX_WHILE '(' exp r_paren opt_nls statement - { $$ = node($3, Node_K_while, $6); } + { + /* + * [Op_push_loop| z| y] + * ----------------- + * z: + * cond + * ----------------- + * [Op_jmp_false y ] + * ----------------- + * body + * ----------------- + * [Op_jmp z ] + * y: [Op_pop_loop ] + */ + + INSTRUCTION *ip, *tp; + + tp = instruction(Op_pop_loop); + + add_lint($3, LINT_assign_in_cond); + $1->opcode = Op_push_loop; + $1->target_continue = $3->nexti; + $1->target_break = tp; + ip = list_create($1); + + (void) list_merge(ip, $3); + (void) list_append(ip, instruction(Op_jmp_false)); + ip->lasti->target_jmp = tp; + + if (do_profiling) { + (void) list_append(ip, instruction(Op_exec_count)); + ($1 + 1)->opcode = Op_K_while; + ($1 + 1)->while_body = ip->lasti; + } + + if ($6 != NULL) + (void) list_merge(ip, $6); + (void) list_append(ip, instruction(Op_jmp)); + ip->lasti->target_jmp = $1->target_continue; + $$ = list_append(ip, tp); + + break_allowed--; + continue_allowed--; + fix_break_continue($1, tp, TRUE); + } | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls - { $$ = node($6, Node_K_do, $3); } - | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement { - /* - * Efficiency hack. Recognize the special case of + /* + * [Op_push_loop | x | y] + * ----------------- + * z: + * body + * ----------------- + * x: + * cond + * ----------------- + * [Op_jmp_true | z ] + * y: [Op_pop_loop ] + */ + + INSTRUCTION *ip; + + $4->opcode = Op_pop_loop; + $1->opcode = Op_push_loop; + $1->target_continue = $6->nexti; + $1->target_break = $4; + + add_lint($6, LINT_assign_in_cond); + if ($3 != NULL) + ip = list_merge($3, $6); + else + ip = list_prepend($6, instruction(Op_no_op)); + + if (do_profiling) { + (void) list_prepend(ip, instruction(Op_exec_count)); + ($1 + 1)->opcode = Op_K_do; + ($1 + 1)->doloop_cond = $1->target_continue; + } + + (void) list_append(ip, instruction(Op_jmp_true)); + ip->lasti->target_jmp = ip->nexti; + $$ = list_prepend(ip, $1); + (void) list_append(ip, $4); + + break_allowed--; + continue_allowed--; + fix_break_continue($1, $4, TRUE); + } + | LEX_FOR '(' NAME LEX_IN simple_variable r_paren opt_nls statement + { + + char *var_name = $3->lextok; + + if ($8 != NULL + && $8->lasti->opcode == Op_K_delete + && $8->lasti->expr_count == 1 + && $8->nexti->opcode == Op_push + && ($8->nexti->memory->type != Node_var || !($8->nexti->memory->var_update)) + && strcmp($8->nexti->memory->vname, var_name) == 0 + ) { + + /* Efficiency hack. Recognize the special case of * * for (iggy in foo) * delete foo[iggy] @@ -425,113 +619,192 @@ statement * * Check that the body is a `delete a[i]' statement, * and that both the loop var and array names match. - */ - if ($8 != NULL && $8->type == Node_K_delete && $8->rnode != NULL) { - NODE *arr, *sub; - - assert($8->rnode->type == Node_expression_list); - arr = $8->lnode; /* array var */ - sub = $8->rnode->lnode; /* index var */ - - if ( (arr->type == Node_var_new - || arr->type == Node_var_array - || arr->type == Node_param_list) - && (sub->type == Node_var_new - || sub->type == Node_var - || sub->type == Node_param_list) - && strcmp($3, sub->vname) == 0 - && strcmp($5, arr->vname) == 0) { - $8->type = Node_K_delete_loop; + */ + NODE *arr = NULL; + INSTRUCTION *ip = $8->nexti->nexti; + + if ($5->nexti->opcode == Op_push && $5->lasti == $5->nexti) + arr = $5->nexti->memory; + if (arr != NULL + && ip->opcode == Op_no_op + && ip->nexti->opcode == Op_push_array + && strcmp(ip->nexti->memory->vname, arr->vname) == 0 + && ip->nexti->nexti == $8->lasti + ) { + (void) make_assignable($8->nexti); + $8->lasti->opcode = Op_K_delete_loop; + $8->lasti->expr_count = 0; + bcfree($1); + efree(var_name); + bcfree($3); + bcfree($4); + bcfree($5); $$ = $8; - free($3); /* thanks to valgrind for pointing these out */ - free($5); } - else - goto regular_loop; } else { - regular_loop: - $$ = node($8, Node_K_arrayfor, - make_for_loop(variable($3, CAN_FREE, Node_var), - (NODE *) NULL, variable($5, CAN_FREE, Node_var_array))); - } + + /* [ Op_push_array a ] + * [ Op_arrayfor_init| w ] + * [ Op_push_loop | z | y ] + * z: [ Op_arrayfor_incr | y ] + * [ Op_var_assign if any ] + * + * body + * + * [Op_jmp | z ] + * y: [Op_pop_loop ] + * w: [Op_arrayfor_final ] + */ + INSTRUCTION *ip; + + ip = $5; + ip->nexti->opcode = Op_push_array; + $3->opcode = Op_arrayfor_init; + (void) list_append(ip, $3); + + $4->opcode = Op_arrayfor_incr; + $4->array_var = variable(var_name, Node_var); + $1->opcode = Op_push_loop; + $1->target_continue = $4; + + (void) list_append(ip, $1); + + /* add update_FOO instruction if necessary */ + if ($4->array_var->type == Node_var && $4->array_var->var_update) { + (void) list_append(ip, instruction(Op_var_update)); + ip->lasti->memory = $4->array_var; + } + (void) list_append(ip, $4); + + /* add set_FOO instruction if necessary */ + if ($4->array_var->type == Node_var && $4->array_var->var_assign) { + (void) list_append(ip, instruction(Op_var_assign)); + ip->lasti->memory = $4->array_var; + } + + if (do_profiling) { + (void) list_append(ip, instruction(Op_exec_count)); + ($1 + 1)->opcode = Op_K_arrayfor; + ($1 + 1)->forloop_cond = $4; + ($1 + 1)->forloop_body = ip->lasti; + } + + if ($8 != NULL) + (void) list_merge(ip, $8); + + (void) list_append(ip, instruction(Op_jmp)); + ip->lasti->target_jmp = $4; + (void) list_append(ip, instruction(Op_pop_loop)); + $4->target_jmp = $1->target_break = ip->lasti; + $$ = list_append(ip, instruction(Op_arrayfor_final)); + $3->target_jmp = $$->lasti; + + fix_break_continue($1, $4->target_jmp, TRUE); + } + + break_allowed--; + continue_allowed--; } | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement { - $$ = node($12, Node_K_for, (NODE *) make_for_loop($3, $6, $9)); + $$ = mk_for_loop($1, $3, $6, $9, $12); + + break_allowed--; + continue_allowed--; } | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement { - $$ = node($11, Node_K_for, - (NODE *) make_for_loop($3, (NODE *) NULL, $8)); + $$ = mk_for_loop($1, $3, (INSTRUCTION *) NULL, $8, $11); + + break_allowed--; + continue_allowed--; + } + | non_compound_stmt + { + if (do_profiling) + $$ = list_prepend($1, instruction(Op_exec_count)); + else + $$ = $1; + } + ; + +non_compound_stmt + : LEX_BREAK statement_term + { + if (! break_allowed) + yyerror(_("`break' is not allowed outside a loop or switch")); + + $1->target_jmp = NULL; + $$ = list_create($1); + } - | LEX_BREAK statement_term - /* for break, maybe we'll have to remember where to break to */ - { $$ = node((NODE *) NULL, Node_K_break, (NODE *) NULL); } | LEX_CONTINUE statement_term - /* similarly */ - { $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); } - | LEX_NEXT statement_term - { NODETYPE type; + { + if (! continue_allowed) + yyerror(_("`continue' is not allowed outside a loop")); - if (begin_or_end_rule) - yyerror(_("`%s' used in %s action"), "next", - (parsing_end_rule ? "END" : "BEGIN")); - else if (beginfile_or_endfile_rule) - yyerror(_("`%s' used in %s action"), "next", - (parsing_endfile_rule ? "ENDFILE" : "BEGINFILE")); - type = Node_K_next; - $$ = node((NODE *) NULL, type, (NODE *) NULL); - } + $1->target_jmp = NULL; + $$ = list_create($1); + + } + | LEX_NEXT statement_term + { + if (rule != Rule) + yyerror(_("`next' used in %s action"), ruletab[rule]); + $1->target_jmp = ip_rec; + $$ = list_create($1); + } | LEX_NEXTFILE statement_term - { - static short warned = FALSE; + { + static short warned = FALSE; - if (do_traditional) { - /* - * can't use yyerror, since may have overshot - * the source line - */ + if (do_traditional) { + /* + * can't use yyerror, since may have overshot + * the source line + */ errcount++; error(_("`nextfile' is a gawk extension")); - } - if (do_lint && ! warned) { - warned = TRUE; + } + if (do_lint && ! warned) { + warned = TRUE; lintwarn(_("`nextfile' is a gawk extension")); - } - if (begin_or_end_rule) { - /* same thing */ - errcount++; - error(_("`%s' used in %s action"), "nextfile", - (parsing_end_rule ? "END" : "BEGIN")); - } -#if 0 - else if (beginfile_or_endfile_rule) { - /* same thing */ - errcount++; - error(_("`%s' used in %s action"), "nextfile", - (parsing_endfile_rule ? "END" : "BEGIN")); - } -#endif - else if (parsing_endfile_rule) { - /* same thing */ + } + if (rule == BEGIN || rule == END || rule == ENDFILE) { errcount++; - error(_("`%s' used in %s action"), "nextfile", - (parsing_endfile_rule ? "ENDFILE" : "BEGINFILE")); - } - $$ = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL); + error(_("`nextfile' used in %s action"), ruletab[rule]); } + + $1->target_jmp = ip_newfile; + $1->target_endfile = ip_endfile; + $$ = list_create($1); + } | LEX_EXIT opt_exp statement_term - { $$ = node($2, Node_K_exit, (NODE *) NULL); } + { + if (rule == END) + $1->target_jmp = ip_atexit; + else + $1->target_jmp = ip_end; /* first instruction (no-op) in end block */ + + if ($2 == NULL) { + $$ = list_create($1); + (void) list_prepend($$, instruction(Op_push_i)); + $$->nexti->memory = Nnull_string; + } else + $$ = list_append($2, $1); + } | LEX_RETURN - { - if (! can_return) + { + if (! can_return) yyerror(_("`return' used outside function context")); - } - opt_exp statement_term - { - $$ = node($3 == NULL ? Nnull_string : $3, - Node_K_return, (NODE *) NULL); - } + } opt_exp statement_term { + if ($3 == NULL) { + $$ = list_create($1); + (void) list_prepend($$, instruction(Op_push_i)); + $$->nexti->memory = Nnull_string; + } else + $$ = list_append($3, $1); + } | simple_stmt statement_term ; @@ -548,136 +821,213 @@ simple_stmt { /* * Optimization: plain `print' has no expression list, so $3 is null. - * If $3 is an expression list with one element (rnode == null) - * and lnode is a field spec for field 0, we have `print $0'. - * For both, use Node_K_print_rec, which is faster for these two cases. + * If $3 is NULL or is a bytecode list for $0 use Op_K_print_rec, + * which is faster for these two cases. */ - if ($1 == Node_K_print && - ($3 == NULL - || ($3->type == Node_expression_list - && $3->rnode == NULL - && $3->lnode->type == Node_field_spec - && $3->lnode->lnode->type == Node_val - && $3->lnode->lnode->numbr == 0.0)) + + if ($1->opcode == Op_K_print && + ($3 == NULL + || ($3->lasti->opcode == Op_field_spec + && $3->nexti->nexti->nexti == $3->lasti + && $3->nexti->nexti->opcode == Op_push_i + && $3->nexti->nexti->memory->type == Node_val + && $3->nexti->nexti->memory->numbr == 0.0) + ) ) { static short warned = FALSE; + /* ----------------- + * output_redir + * [ redirect exp ] + * ----------------- + * expression_list + * ------------------ + * [Op_K_print_rec | NULL | redir_type | expr_count] + */ - $$ = node(NULL, Node_K_print_rec, $4); + if ($3 != NULL) { + bcfree($3->lasti); /* Op_field_spec */ + $3->nexti->nexti->memory->flags &= ~PERM; + $3->nexti->nexti->memory->flags |= MALLOC; + unref($3->nexti->nexti->memory); /* Node_val */ + bcfree($3->nexti->nexti); /* Op_push_i */ + bcfree($3->nexti); /* Op_list */ + bcfree($3); /* Op_list */ + } else { + if (do_lint && (rule == BEGIN || rule == END) && ! warned) { + warned = TRUE; + lintwarn( + _("plain `print' in BEGIN or END rule should probably be `print \"\"'")); + } + } - if (do_lint && $3 == NULL && begin_or_end_rule && ! warned) { - warned = TRUE; - lintwarn( - _("plain `print' in BEGIN or END rule should probably be `print \"\"'")); + $1->expr_count = 0; + $1->opcode = Op_K_print_rec; + if ($4 == NULL) { /* no redircetion */ + $1->redir_type = 0; + $$ = list_create($1); + } else { + INSTRUCTION *ip; + ip = $4->nexti; + $1->redir_type = ip->redir_type; + $4->nexti = ip->nexti; + bcfree(ip); + $$ = list_append($4, $1); } } else { - $$ = node($3, $1, $4); - if ($$->type == Node_K_printf) - count_args($$); + /* ----------------- + * [ output_redir ] + * [ redirect exp ] + * ----------------- + * [ expression_list ] + * ------------------ + * [$1 | NULL | redir_type | expr_count] + * + */ + + if ($4 == NULL) { /* no redirection */ + if ($3 == NULL) { /* printf without arg */ + $1->expr_count = 0; + $1->redir_type = 0; + $$ = list_create($1); + } else { + INSTRUCTION *t = $3; + $1->expr_count = count_expressions(&t, FALSE); + $1->redir_type = 0; + $$ = list_append(t, $1); + } + } else { + INSTRUCTION *ip; + ip = $4->nexti; + $1->redir_type = ip->redir_type; + $4->nexti = ip->nexti; + bcfree(ip); + if ($3 == NULL) { + $1->expr_count = 0; + $$ = list_append($4, $1); + } else { + INSTRUCTION *t = $3; + $1->expr_count = count_expressions(&t, FALSE); + $$ = list_append(list_merge($4, t), $1); + } + } } } - | LEX_DELETE NAME '[' expression_list ']' - { $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); } - | LEX_DELETE NAME - { - static short warned = FALSE; - if (do_lint && ! warned) { - warned = TRUE; - lintwarn(_("`delete array' is a gawk extension")); - } - if (do_traditional) { - /* - * can't use yyerror, since may have overshot - * the source line - */ - errcount++; - error(_("`delete array' is a gawk extension")); - } - $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL); + | LEX_DELETE NAME { sub_counter = 0; } delete_subscript_list + { + char *arr = $2->lextok; + + $2->opcode = Op_push_array; + $2->memory = variable(arr, Node_var_array); + + if ($4 == NULL) { + static short warned = FALSE; + if (do_lint && ! warned) { + warned = TRUE; + lintwarn(_("`delete array' is a gawk extension")); + } + if (do_traditional) { + /* + * can't use yyerror, since may have overshot + * the source line + */ + errcount++; + error(_("`delete array' is a gawk extension")); + } + $1->expr_count = 0; + $$ = list_append(list_create($2), $1); + } else { + $1->expr_count = sub_counter; + $$ = list_append(list_append($4, $2), $1); } + } | LEX_DELETE '(' NAME ')' - { /* * this is for tawk compatibility. maybe the warnings * should always be done. */ - static short warned = FALSE; + { + static short warned = FALSE; + char *arr = $3->lextok; - if (do_lint && ! warned) { + if (do_lint && ! warned) { warned = TRUE; lintwarn(_("`delete(array)' is a non-portable tawk extension")); - } - if (do_traditional) { + } + if (do_traditional) { /* * can't use yyerror, since may have overshot - * the source line + * the source line. */ errcount++; error(_("`delete(array)' is a non-portable tawk extension")); - } - $$ = node(variable($3, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL); } + $3->memory = variable(arr, Node_var_array); + $3->opcode = Op_push_array; + $1->expr_count = 0; + $$ = list_append(list_create($3), $1); + } | exp - { $$ = $1; } + { $$ = optimize_assignment($1); } ; opt_simple_stmt : /* empty */ - { $$ = NULL; } + { $$ = NULL; } | simple_stmt - { $$ = $1; } + { $$ = $1; } ; switch_body : case_statements - { - if ($1 == NULL) { - $$ = NULL; - } else { - NODE *dflt = NULL; - NODE *head = $1; - NODE *curr; - - const char **case_values = NULL; - - int maxcount = 128; - int case_count = 0; - int i; - - emalloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body"); - for (curr = $1; curr != NULL; curr = curr->rnode) { - /* Assure that case statement values are unique. */ - if (curr->lnode->type == Node_K_case) { - char *caseval; - - if (curr->lnode->lnode->type == Node_regex) - caseval = curr->lnode->lnode->re_exp->stptr; - else - caseval = force_string(tree_eval(curr->lnode->lnode))->stptr; - - for (i = 0; i < case_count; i++) - if (strcmp(caseval, case_values[i]) == 0) - yyerror(_("duplicate case values in switch body: %s"), caseval); - - if (case_count >= maxcount) { - maxcount += 128; - erealloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body"); + { + INSTRUCTION *dflt = NULL; + + if ($1 != NULL) { + INSTRUCTION *curr; + const char **case_values = NULL; + int maxcount = 128; + int case_count = 0; + int i; + + emalloc(case_values, const char **, sizeof(char *) * maxcount, "statement"); + + for (curr = $1->case_val->nexti; curr != NULL; curr = curr->nexti) { + if (curr->opcode == Op_K_case) { + char *caseval; + if (curr->memory->type == Node_regex) + caseval = curr->memory->re_exp->stptr; + else + caseval = force_string(curr->memory)->stptr; + for (i = 0; i < case_count; i++) + if (strcmp(caseval, case_values[i]) == 0) + yyerror(_("duplicate case values in switch body: %s"), caseval); + + if (case_count >= maxcount) { + maxcount += 128; + erealloc(case_values, const char **, sizeof(char*) * maxcount, "statement"); + } + case_values[case_count++] = caseval; + } else { + /* Otherwise save a pointer to the default node. */ + if (dflt != NULL) + yyerror(_("duplicate `default' detected in switch body")); + dflt = curr; } - case_values[case_count++] = caseval; - } else { - /* Otherwise save a pointer to the default node. */ - if (dflt != NULL) - yyerror(_("Duplicate `default' detected in switch body")); - dflt = curr; } + + efree(case_values); + $$ = list_prepend($1->case_stmt, instruction(Op_K_switch)); + $$->nexti->case_val = $1->case_val->nexti; + $$->nexti->switch_dflt = dflt; + bcfree($1->case_val); /* Op_list */ + bcfree($1); /* Op_case_list */ + } else { + $$ = list_create(instruction(Op_K_switch)); + $$->nexti->case_val = NULL; + $$->nexti->switch_dflt = NULL; } - - free(case_values); - - /* Create the switch body. */ - $$ = node(head, Node_switch_body, dflt); } - } ; case_statements @@ -685,20 +1035,15 @@ case_statements { $$ = NULL; } | case_statements case_statement { - if ($2 == NULL) + if ($1 == NULL) { + $2->case_val = list_create($2->case_val); + $$ = $2; + } else { + (void) list_append($1->case_val, $2->case_val); + (void) list_merge($1->case_stmt, $2->case_stmt); + bcfree($2); /* Op_case_list */ $$ = $1; - else { - if (do_lint && isnoeffect($2->type)) - lintwarn(_("statement may have no effect")); - if ($1 == NULL) - $$ = node($2, Node_case_list, (NODE *) NULL); - else - $$ = append_right( - ($1->type == Node_case_list ? $1 : node($1, Node_case_list, (NODE *) NULL)), - ($2->type == Node_case_list ? $2 : node($2, Node_case_list, (NODE *) NULL)) - ); } - yyerrok; } | case_statements error { $$ = NULL; } @@ -706,30 +1051,77 @@ case_statements case_statement : LEX_CASE case_value colon opt_nls statements - { $$ = node($2, Node_K_case, $5); } + { + INSTRUCTION *casestmt = $5; + + $1->memory = $2->memory; + bcfree($2); + if ($5 == NULL) + casestmt = list_create(instruction(Op_no_op)); + if (do_profiling) + (void) list_prepend(casestmt, instruction(Op_exec_count)); + + $1->target_stmt = casestmt->nexti; + + /* recycle $3 as Op_case_list */ + $3->opcode = Op_case_list; + $3->case_val = $1; /* Op_K_case */ + $3->case_stmt = casestmt; /* Op_list */ + $$ = $3; + } | LEX_DEFAULT colon opt_nls statements - { $$ = node((NODE *) NULL, Node_K_default, $4); } + { + INSTRUCTION *casestmt = $4; + + if ($4 == NULL) + casestmt = list_create(instruction(Op_no_op)); + if (do_profiling) + (void) list_prepend(casestmt, instruction(Op_exec_count)); + + $1->target_stmt = casestmt->nexti; + $2->opcode = Op_case_list; + $2->case_val = $1; /* Op_K_default */ + $2->case_stmt = casestmt; /* Op_list */ + $$ = $2; + } ; case_value : YNUMBER - { $$ = $1; } - | '-' YNUMBER %prec UNARY { - $2->numbr = -(force_number($2)); + $1->opcode = Op_K_case; + $$ = $1; + } + | '-' YNUMBER %prec UNARY + { + $2->memory->numbr = -(force_number($2->memory)); + bcfree($1); + $2->opcode = Op_K_case; $$ = $2; } | '+' YNUMBER %prec UNARY - { $$ = $2; } - | YSTRING - { $$ = $1; } - | regexp - { $$ = $1; } + { + bcfree($1); + $2->opcode = Op_K_case; + $$ = $2; + } + | YSTRING + { + $1->opcode = Op_K_case; + $$ = $1; + } + | regexp + { + $1->opcode = Op_K_case; + $$ = $1; + } ; print : LEX_PRINT + { $$ = $1; } | LEX_PRINTF + { $$ = $1; } ; /* @@ -738,8 +1130,10 @@ print */ print_expression_list : opt_expression_list - | '(' exp comma expression_list r_paren - { $$ = node($2, Node_expression_list, $4); } + | '(' expression_list r_paren + { + $$ = $2; + } ; output_redir @@ -751,25 +1145,24 @@ output_redir } | IO_OUT { in_print = FALSE; in_parens = 0; } common_exp { - $$ = node($3, $1, (NODE *) NULL); - if ($1 == Node_redirect_twoway - && $3->type == Node_K_getline - && $3->rnode != NULL - && $3->rnode->type == Node_redirect_twoway) + if ($1->redir_type == redirect_twoway + && $3->lasti->opcode == Op_K_getline_redir + && $3->lasti->redir_type == redirect_twoway) yyerror(_("multistage two-way pipelines don't work")); + $$ = list_prepend($3, $1); } ; if_statement : LEX_IF '(' exp r_paren opt_nls statement { - $$ = node($3, Node_K_if, - node($6, Node_if_branches, (NODE *) NULL)); + $$ = mk_condition($3, $1, $6, NULL, NULL); } | LEX_IF '(' exp r_paren opt_nls statement LEX_ELSE opt_nls statement - { $$ = node($3, Node_K_if, - node($6, Node_if_branches, $9)); } + { + $$ = mk_condition($3, $1, $6, $7, $9); + } ; nls @@ -784,178 +1177,264 @@ opt_nls input_redir : /* empty */ - { $$ = NULL; } + { $$ = NULL; } | '<' simp_exp - { $$ = node($2, Node_redirect_input, (NODE *) NULL); } + { + bcfree($1); + $$ = $2; + } ; opt_param_list : /* empty */ - { $$ = NULL; } | param_list - { $$ = $1; } ; param_list : NAME - { $$ = make_param($1); } + { + append_param($1->lextok); + $1->lextok = NULL; + bcfree($1); + } | param_list comma NAME - { $$ = append_right($1, make_param($3)); yyerrok; } + { + append_param($3->lextok); + $3->lextok = NULL; + bcfree($3); + yyerrok; + } | error - { $$ = NULL; } + { /* func_params = NULL; */ } | param_list error - { $$ = NULL; } + { /* func_params = NULL; */ } | param_list comma error - { $$ = NULL; } + { /* func_params = NULL; */ } ; /* optional expression, as in for loop */ opt_exp : /* empty */ - { $$ = NULL; } + { $$ = NULL; } | exp - { $$ = $1; } + { $$ = $1; } ; opt_expression_list : /* empty */ - { $$ = NULL; } + { $$ = NULL; } | expression_list - { $$ = $1; } + { $$ = $1; } ; expression_list : exp - { $$ = node($1, Node_expression_list, (NODE *) NULL); } + { $$ = mk_expression_list(NULL, $1); } | expression_list comma exp - { - $$ = append_right($1, - node($3, Node_expression_list, (NODE *) NULL)); - yyerrok; - } + { + $$ = mk_expression_list($1, $3); + yyerrok; + } | error - { $$ = NULL; } + { $$ = NULL; } | expression_list error - { $$ = NULL; } + { $$ = NULL; } | expression_list error exp - { $$ = NULL; } + { $$ = NULL; } | expression_list comma error - { $$ = NULL; } + { $$ = NULL; } ; /* Expressions, not including the comma operator. */ -exp : variable assign_operator exp %prec ASSIGNOP - { - if (do_lint && $3->type == Node_regex) +exp + : variable assign_operator exp %prec ASSIGNOP + { + if (do_lint && $3->lasti->opcode == Op_match_rec) lintwarn(_("regular expression on right of assignment")); - $$ = optimize_concat($1, $2, $3); - } + $$ = mk_assignment($1, $3, $2); + } | exp LEX_AND exp - { $$ = node($1, Node_and, $3); } + { $$ = mk_boolean($1, $3, $2); } | exp LEX_OR exp - { $$ = node($1, Node_or, $3); } + { $$ = mk_boolean($1, $3, $2); } | exp MATCHOP exp - { - if ($1->type == Node_regex) + { + if ($1->lasti->opcode == Op_match_rec) warning(_("regular expression on left of `~' or `!~' operator")); - $$ = node($1, $2, mk_rexp($3)); - } - | exp LEX_IN NAME - { - if (do_lint_old) - warning(_("old awk does not support the keyword `in' except after `for'")); - $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); + + if ($3->lasti == $3->nexti && $3->nexti->opcode == Op_match_rec) { + $2->memory = $3->nexti->memory; + bcfree($3->nexti); /* Op_match_rec */ + bcfree($3); /* Op_list */ + $$ = list_append($1, $2); + } else { + $2->memory = make_regnode(Node_dynregex, NULL); + $$ = list_append(list_merge($1, $3), $2); } + } + | exp LEX_IN simple_variable + { + if (do_lint_old) + warning(_("old awk does not support the keyword `in' except after `for'")); + $3->nexti->opcode = Op_push_array; + $2->opcode = Op_in_array; + $2->expr_count = 1; + $$ = list_append(list_merge($1, $3), $2); + } | exp a_relop exp %prec RELOP - { - if (do_lint && $3->type == Node_regex) + { + if (do_lint && $3->lasti->opcode == Op_match_rec) lintwarn(_("regular expression on right of comparison")); - $$ = node($1, $2, $3); - } + $$ = list_append(list_merge($1, $3), $2); + } | exp '?' exp ':' exp - { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} + { $$ = mk_condition($1, $2, $3, $4, $5); } | common_exp - { $$ = $1; } + { $$ = $1; } ; assign_operator : ASSIGN - { $$ = $1; } + { $$ = $1; } | ASSIGNOP - { $$ = $1; } + { $$ = $1; } | SLASH_BEFORE_EQUAL ASSIGN /* `/=' */ - { $$ = Node_assign_quotient; } + { + $2->opcode = Op_assign_quotient; + $$ = $2; + } ; relop_or_less : RELOP - { $$ = $1; } + { $$ = $1; } | '<' - { $$ = Node_less; } + { $$ = $1; } ; + a_relop : relop_or_less + { $$ = $1; } | '>' - { $$ = Node_greater; } + { $$ = $1; } ; common_exp : simp_exp - { $$ = $1; } + { $$ = $1; } | simp_exp_nc - { $$ = $1; } + { $$ = $1; } | common_exp simp_exp %prec CONCAT_OP - { $$ = constant_fold($1, Node_concat, $2); } + { + int count = 2; + int is_simple_var = FALSE; + INSTRUCTION *ip1, *ip2; + + if ($1->lasti->opcode == Op_concat) { + /* multiple (> 2) adjacent strings optimization */ + is_simple_var = ($1->lasti->concat_flag & CSVAR); + count = $1->lasti->expr_count + 1; + $1->lasti->opcode = Op_no_op; + } else { + is_simple_var = ($1->nexti->opcode == Op_push + && $1->lasti == $1->nexti); /* first exp. is a simple + * variable?; kludge for use + * in Op_assign_concat. + */ + } + ip1 = $1->nexti; + ip2 = $2->nexti; + if (ip1->memory != NULL && ip1->memory->type == Node_val && ip1 == $1->lasti + && ip2->memory != NULL && ip2->memory->type == Node_val && ip2 == $2->lasti && do_optimize > 1){ + size_t nlen; + + ip1->memory = force_string(ip1->memory); + ip2->memory = force_string(ip2->memory); + nlen = ip1->memory->stlen + ip2->memory->stlen; + erealloc(ip1->memory->stptr, char *, nlen + 2, "constant fold"); + memcpy(ip1->memory->stptr + ip1->memory->stlen, ip2->memory->stptr, ip2->memory->stlen); + ip1->memory->stlen = nlen; + ip1->memory->stptr[nlen] = '\0'; + ip1->memory->flags &= ~(NUMCUR|NUMBER); + ip1->memory->flags |= (STRING|STRCUR); + bcfree($2); + bcfree(ip2); + $1->opcode = Op_push_i; + $$ = $1; + } else { + $$ = list_append(list_merge($1, $2), instruction(Op_concat)); + $$->lasti->concat_flag = (is_simple_var ? CSVAR : 0); + $$->lasti->expr_count = count; + if (count > max_args) + max_args = count; + } + } ; simp_exp : non_post_simp_exp /* Binary operators in order of decreasing precedence. */ | simp_exp '^' simp_exp - { $$ = constant_fold($1, Node_exp, $3); } + { $$ = mk_binary($1, $3, $2); } | simp_exp '*' simp_exp - { $$ = constant_fold($1, Node_times, $3); } + { $$ = mk_binary($1, $3, $2); } | simp_exp '/' simp_exp - { $$ = constant_fold($1, Node_quotient, $3); } + { $$ = mk_binary($1, $3, $2); } | simp_exp '%' simp_exp - { $$ = constant_fold($1, Node_mod, $3); } + { $$ = mk_binary($1, $3, $2); } | simp_exp '+' simp_exp - { $$ = constant_fold($1, Node_plus, $3); } + { $$ = mk_binary($1, $3, $2); } | simp_exp '-' simp_exp - { $$ = constant_fold($1, Node_minus, $3); } + { $$ = mk_binary($1, $3, $2); } | LEX_GETLINE opt_variable input_redir - { - /* - * In BEGINFILE/ENDFILE, allow `getline var < file' - */ - if (beginfile_or_endfile_rule) { - if ($2 != NULL && $3 != NULL) - ; /* all ok */ - else { - if ($2 != NULL) - fatal(_("`getline var' invalid inside %s rule"), - parsing_endfile_rule ? "ENDFILE" : "BEGINFILE"); - else - fatal(_("`getline' invalid inside %s rule"), - parsing_endfile_rule ? "ENDFILE" : "BEGINFILE"); - } - } - if (do_lint && parsing_end_rule && $3 == NULL) - lintwarn(_("non-redirected `getline' undefined inside END action")); - $$ = node($2, Node_K_getline, $3); + { + /* + * In BEGINFILE/ENDFILE, allow `getline var < file' + */ + if (rule == BEGINFILE || rule == ENDFILE) { + if ($2 != NULL && $3 != NULL) + ; /* all ok */ + else { + if ($2 != NULL) + yyerror(_("`getline var' invalid inside `%s' rule"), ruletab[rule]); + else + yyerror(_("`getline' invalid inside `%s' rule"), ruletab[rule]); + YYABORT; + } } + + if (do_lint && rule == END && $3 == NULL) + lintwarn(_("non-redirected `getline' undefined inside END action")); + $$ = mk_getline($1, $2, $3, redirect_input); + } | variable INCREMENT - { $$ = node($1, Node_postincrement, (NODE *) NULL); } + { + $2->opcode = Op_postincrement; + $$ = mk_assignment($1, NULL, $2); + } | variable DECREMENT - { $$ = node($1, Node_postdecrement, (NODE *) NULL); } - | '(' expression_list r_paren LEX_IN NAME - { - if (do_lint_old) { + { + $2->opcode = Op_postdecrement; + $$ = mk_assignment($1, NULL, $2); + } + | '(' expression_list r_paren LEX_IN simple_variable + { + if (do_lint_old) { warning(_("old awk does not support the keyword `in' except after `for'")); warning(_("old awk does not support multidimensional arrays")); - } - $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); } + $5->nexti->opcode = Op_push_array; + $4->opcode = Op_in_array; + if ($2 == NULL) { /* error */ + errcount++; + $4->expr_count = 0; + $$ = list_merge($5, $4); + } else { + INSTRUCTION *t = $2; + $4->expr_count = count_expressions(&t, FALSE); + $$ = list_append(list_merge(t, $5), $4); + } + } ; /* Expressions containing "| getline" lose the ability to be on the @@ -963,161 +1442,320 @@ simp_exp simp_exp_nc : common_exp IO_IN LEX_GETLINE opt_variable { - $$ = node($4, Node_K_getline, - node($1, $2, (NODE *) NULL)); + $$ = mk_getline($3, $4, $1, $2->redir_type); + bcfree($2); } + /* Binary operators in order of decreasing precedence. */ | simp_exp_nc '^' simp_exp - { $$ = node($1, Node_exp, $3); } + { $$ = mk_binary($1, $3, $2); } | simp_exp_nc '*' simp_exp - { $$ = node($1, Node_times, $3); } + { $$ = mk_binary($1, $3, $2); } | simp_exp_nc '/' simp_exp - { $$ = node($1, Node_quotient, $3); } + { $$ = mk_binary($1, $3, $2); } | simp_exp_nc '%' simp_exp - { $$ = node($1, Node_mod, $3); } + { $$ = mk_binary($1, $3, $2); } | simp_exp_nc '+' simp_exp - { $$ = node($1, Node_plus, $3); } + { $$ = mk_binary($1, $3, $2); } | simp_exp_nc '-' simp_exp - { $$ = node($1, Node_minus, $3); } + { $$ = mk_binary($1, $3, $2); } ; non_post_simp_exp : regexp - { $$ = $1; } + { + $$ = list_create($1); + } | '!' simp_exp %prec UNARY - { $$ = constant_fold($2, Node_not, (NODE *) NULL); } + { + if ($2->opcode == Op_match_rec) { + $2->opcode = Op_nomatch; + $1->opcode = Op_push_i; + $1->memory = mk_number(0.0, (PERM|NUMCUR|NUMBER)); + $$ = list_append(list_append(list_create($1), + instruction(Op_field_spec)), $2); + } else { + INSTRUCTION *ip; + ip = $2->nexti; + if (ip->memory->type == Node_val && $2->lasti == ip && do_optimize > 1) { + NODE *ret; + if ((ip->memory->flags & (STRCUR|STRING)) != 0) { + if (ip->memory->stlen == 0) { + ret = make_number((AWKNUM) 1.0); + } else { + ret = make_number((AWKNUM) 0.0); + } + } else { + if (ip->memory->numbr == 0) { + ret = make_number((AWKNUM) 1.0); + } else { + ret = make_number((AWKNUM) 0.0); + } + } + ret->flags &= ~MALLOC; + ret->flags |= PERM; + $1->memory = ret; + $1->opcode = Op_push_i; + bcfree(ip); + bcfree($2); + $$ = list_create($1); + } else { + $1->opcode = Op_not; + add_lint($2, LINT_assign_in_cond); + $$ = list_append($2, $1); + } + } + } | '(' exp r_paren - { $$ = $2; } - | LEX_BUILTIN - '(' opt_expression_list r_paren - { $$ = snode($3, Node_builtin, (int) $1); } + { $$ = $2; } + | LEX_BUILTIN '(' opt_expression_list r_paren + { + $$ = snode($3, $1); + if ($$ == NULL) + YYABORT; + } | LEX_LENGTH '(' opt_expression_list r_paren - { $$ = snode($3, Node_builtin, (int) $1); } + { + $$ = snode($3, $1); + if ($$ == NULL) + YYABORT; + } | LEX_LENGTH { - static short warned1 = FALSE, warned2 = FALSE; + static short warned1 = FALSE; if (do_lint && ! warned1) { warned1 = TRUE; lintwarn(_("call of `length' without parentheses is not portable")); } - $$ = snode((NODE *) NULL, Node_builtin, (int) $1); - if (do_posix && ! warned2) { - warned2 = TRUE; - warning(_("call of `length' without parentheses is deprecated by POSIX")); - } + $$ = snode(NULL, $1); + if ($$ == NULL) + YYABORT; } - | function_call + | func_call | variable | INCREMENT variable - { $$ = node($2, Node_preincrement, (NODE *) NULL); } + { + $1->opcode = Op_preincrement; + $$ = mk_assignment($2, NULL, $1); + } | DECREMENT variable - { $$ = node($2, Node_predecrement, (NODE *) NULL); } + { + $1->opcode = Op_predecrement; + $$ = mk_assignment($2, NULL, $1); + } | YNUMBER - { $$ = $1; } + { + $$ = list_create($1); + } | YSTRING - { $$ = $1; } - + { + $$ = list_create($1); + } | '-' simp_exp %prec UNARY - { - if ($2->type == Node_val && ($2->flags & (STRCUR|STRING)) == 0) { - $2->numbr = -(force_number($2)); + { + if ($2->lasti->opcode == Op_push_i + && ($2->lasti->memory->flags & (STRCUR|STRING)) == 0) { + $2->lasti->memory->numbr = -(force_number($2->lasti->memory)); $$ = $2; - } else - $$ = node($2, Node_unary_minus, (NODE *) NULL); + bcfree($1); + } else { + $1->opcode = Op_unary_minus; + $$ = list_append($2, $1); } + } | '+' simp_exp %prec UNARY - { - /* - * was: $$ = $2 - * POSIX semantics: force a conversion to numeric type - */ - $$ = node (make_number(0.0), Node_plus, $2); - } + { + /* + * was: $$ = $2 + * POSIX semantics: force a conversion to numeric type + */ + $1->opcode = Op_plus_i; + $1->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); + $$ = list_append($2, $1); + } ; -function_call - : direct_function_call +func_call + : direct_func_call { - func_use($1->rnode->stptr, FUNC_USE); + func_use($1->lasti->func_name, FUNC_USE); $$ = $1; } - | '@' direct_function_call + | '@' direct_func_call { /* indirect function call */ + INSTRUCTION *f, *t; + char *name; + NODE *indirect_var; static short warned = FALSE; + const char *msg = _("indirect function calls are a gawk extension"); - if (do_lint && ! warned) { + if (do_traditional || do_posix) + yyerror("%s", msg); + else if (do_lint && ! warned) { warned = TRUE; - lintwarn(_("indirect function calls are a gawk extension")); + lintwarn("%s", msg); } + + f = $2->lasti; + f->opcode = Op_indirect_func_call; + name = estrdup(f->func_name, strlen(f->func_name)); + indirect_var = variable(name, Node_var_new); + if (is_std_var(name)) + yyerror(_("can not use special variable `%s' for indirect function call"), name); + t = instruction(Op_push); + t->memory = indirect_var; + + /* prepend indirect var instead of appending to arguments (opt_expression_list), + * and pop it off in setup_frame (eval.c) (left to right evaluation order); Test case: + * f = "fun" + * @f(f="real_fun") + */ - $$ = $2; - $$->type = Node_indirect_func_call; + $$ = list_prepend($2, t); } ; -direct_function_call +direct_func_call : FUNC_CALL '(' opt_expression_list r_paren { - $$ = node($3, Node_func_call, make_string($1, strlen($1))); - $$->funcbody = NULL; param_sanity($3); - free($1); + $1->opcode = Op_func_call; + $1->func_body = NULL; + if ($3 == NULL) { /* no argument or error */ + ($1 + 1)->expr_count = 0; + $$ = list_create($1); + } else { + INSTRUCTION *t = $3; + ($1 + 1)->expr_count = count_expressions(&t, TRUE); + $$ = list_append(t, $1); + } } ; opt_variable : /* empty */ - { $$ = NULL; } + { $$ = NULL; } | variable - { $$ = $1; } + { $$ = $1; } ; -variable +delete_subscript_list + : /* empty */ + { $$ = NULL; } + | delete_subscript SUBSCRIPT + { $$ = $1; } + ; + +delete_subscript + : delete_exp_list + { $$ = $1; } + | delete_subscript delete_exp_list + { + $$ = list_merge($1, $2); + } + ; + +delete_exp_list + : bracketed_exp_list + { + INSTRUCTION *ip = $1->lasti; + int count = ip->sub_count; /* # of SUBSEP-seperated expressions */ + if (count > 1) { + /* change Op_subscript or Op_sub_array to Op_concat */ + ip->opcode = Op_concat; + ip->concat_flag = CSUBSEP; + ip->expr_count = count; + } else + ip->opcode = Op_no_op; + sub_counter++; /* count # of dimensions */ + $$ = $1; + } + ; + +bracketed_exp_list + : '[' expression_list ']' + { + INSTRUCTION *t = $2; + if ($2 == NULL) { + errcount++; + error(_("invalid subscript expression")); + /* install Null string as subscript. */ + t = list_create(instruction(Op_push_i)); + t->nexti->memory = Nnull_string; + $3->sub_count = 1; + } else + $3->sub_count = count_expressions(&t, FALSE); + $$ = list_append(t, $3); + } + ; + +subscript + : bracketed_exp_list + { $$ = $1; } + | subscript bracketed_exp_list + { + $$ = list_merge($1, $2); + } + ; + +subscript_list + : subscript SUBSCRIPT + { $$ = $1; } + ; + +simple_variable : NAME - { $$ = variable($1, CAN_FREE, Node_var_new); } - | NAME '[' expression_list ']' + { + char *var_name = $1->lextok; + + $1->opcode = Op_push; + $1->memory = variable(var_name, Node_var_new); + $$ = list_create($1); + } + | NAME subscript_list { NODE *n; - if ((n = lookup($1)) != NULL && ! isarray(n)) { + char *arr = $1->lextok; + if ((n = lookup(arr)) != NULL && ! isarray(n)) yyerror(_("use of non-array as array")); - $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3); - } else if ($3 == NULL) { - fatal(_("invalid subscript expression")); - } else if ($3->rnode == NULL) { - $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode); - freenode($3); - } else - $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3); + $1->memory = variable(arr, Node_var_array); + $1->opcode = Op_push_array; + $$ = list_prepend($2, $1); } - | field_spec { $$ = $1; } -/* -#if 0 - | lex_builtin - { fatal(_("can't use built-in function `%s' as a variable"), tokstart); } -#endif -*/ ; -field_spec - : '$' non_post_simp_exp opt_incdec +variable + : simple_variable { - NODE *n = node($2, Node_field_spec, (NODE *) NULL); - if ($3 != NULL) { - if ($3[0] == '+') - $$ = node(n, Node_postincrement, (NODE *) NULL); - else - $$ = node(n, Node_postdecrement, (NODE *) NULL); - } else { - $$ = n; - } + INSTRUCTION *ip = $1->nexti; + if (ip->opcode == Op_push + && ip->memory->type == Node_var + && ip->memory->var_update + ) { + $$ = list_prepend($1, instruction(Op_var_update)); + $$->nexti->memory = ip->memory; + } else + $$ = $1; + } + | '$' non_post_simp_exp opt_incdec + { + $$ = list_append($2, $1); + if ($3 != NULL) + mk_assignment($2, NULL, $3); } ; opt_incdec - : INCREMENT { $$ = "+"; } - | DECREMENT { $$ = "-"; } + : INCREMENT + { + $1->opcode = Op_postincrement; + } + | DECREMENT + { + $1->opcode = Op_postdecrement; + } | /* empty */ { $$ = NULL; } ; @@ -1143,19 +1781,19 @@ semi ; colon - : ':' { yyerrok; } + : ':' { $$ = $1; yyerrok; } ; -comma : ',' opt_nls { yyerrok; } +comma + : ',' opt_nls { yyerrok; } ; - %% struct token { - const char *operator; /* text to match */ - NODETYPE value; /* node type */ - int class; /* lexical class */ - unsigned flags; /* # of args. allowed and compatability */ + const char *operator; /* text to match */ + OPCODE value; /* type */ + int class; /* lexical class */ + unsigned flags; /* # of args. allowed and compatability */ # define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */ # define A(n) (1<<(n)) # define VERSION_MASK 0xFF00 /* old awk is zero */ @@ -1163,7 +1801,9 @@ struct token { # define NOT_POSIX 0x0200 /* feature not in POSIX */ # define GAWKX 0x0400 /* gawk extension */ # define RESX 0x0800 /* Bell Labs Research extension */ - NODE *(*ptr) P((NODE *)); /* function that implements this keyword */ +# define BREAK 0x1000 /* break allowed inside */ +# define CONTINUE 0x2000 /* continue allowed inside */ + NODE *(*ptr)(int); /* function that implements this keyword */ }; #if 'a' == 0x81 /* it's EBCDIC */ @@ -1188,77 +1828,79 @@ tokcompare(void *l, void *r) */ static const struct token tokentab[] = { -{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0}, -{"BEGINFILE", Node_illegal, LEX_BEGINFILE, GAWKX, 0}, -{"END", Node_illegal, LEX_END, 0, 0}, -{"ENDFILE", Node_illegal, LEX_ENDFILE, GAWKX, 0}, +{"BEGIN", Op_rule, LEX_BEGIN, 0, 0}, +{"BEGINFILE", Op_rule, LEX_BEGINFILE, GAWKX, 0}, +{"END", Op_rule, LEX_END, 0, 0}, +{"ENDFILE", Op_rule, LEX_ENDFILE, GAWKX, 0}, #ifdef ARRAYDEBUG -{"adump", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_adump}, +{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_adump}, #endif -{"and", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_and}, -{"asort", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asort}, -{"asorti", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asorti}, -{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2}, -{"bindtextdomain", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain}, -{"break", Node_K_break, LEX_BREAK, 0, 0}, -{"case", Node_K_case, LEX_CASE, GAWKX, 0}, -{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close}, -{"compl", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl}, -{"continue", Node_K_continue, LEX_CONTINUE, 0, 0}, -{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos}, -{"dcgettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext}, -{"dcngettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext}, -{"default", Node_K_default, LEX_DEFAULT, GAWKX, 0}, -{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0}, -{"do", Node_K_do, LEX_DO, NOT_OLD, 0}, -{"else", Node_illegal, LEX_ELSE, 0, 0}, -{"exit", Node_K_exit, LEX_EXIT, 0, 0}, -{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp}, -{"extension", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_ext}, -{"fflush", Node_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush}, -{"for", Node_K_for, LEX_FOR, 0, 0}, -{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0}, -{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0}, -{"gensub", Node_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub}, -{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0}, -{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub}, -{"if", Node_K_if, LEX_IF, 0, 0}, -{"in", Node_illegal, LEX_IN, 0, 0}, -{"index", Node_builtin, LEX_BUILTIN, A(2), do_index}, -{"int", Node_builtin, LEX_BUILTIN, A(1), do_int}, -{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length}, -{"log", Node_builtin, LEX_BUILTIN, A(1), do_log}, -{"lshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift}, -{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match}, -{"mktime", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_mktime}, -{"next", Node_K_next, LEX_NEXT, 0, 0}, -{"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0}, -{"or", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_or}, -{"patsplit", Node_builtin, LEX_BUILTIN, GAWKX|A(2)|A(3)|A(4), do_patsplit}, -{"print", Node_K_print, LEX_PRINT, 0, 0}, -{"printf", Node_K_printf, LEX_PRINTF, 0, 0}, -{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand}, -{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0}, -{"rshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift}, -{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin}, -{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3)|A(4), do_split}, -{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf}, -{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt}, -{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand}, +{"and", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_and}, +{"asort", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asort}, +{"asorti", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asorti}, +{"atan2", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2}, +{"bindtextdomain", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain}, +{"break", Op_K_break, LEX_BREAK, 0, 0}, +{"case", Op_K_case, LEX_CASE, GAWKX, 0}, +{"close", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close}, +{"compl", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl}, +{"continue", Op_K_continue, LEX_CONTINUE, 0, 0}, +{"cos", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos}, +{"dcgettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext}, +{"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext}, +{"default", Op_K_default, LEX_DEFAULT, GAWKX, 0}, +{"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0}, +{"do", Op_symbol, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0}, +{"else", Op_K_else, LEX_ELSE, 0, 0}, +{"eval", Op_symbol, LEX_EVAL, 0, 0}, +{"exit", Op_K_exit, LEX_EXIT, 0, 0}, +{"exp", Op_builtin, LEX_BUILTIN, A(1), do_exp}, +{"extension", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_ext}, +{"fflush", Op_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush}, +{"for", Op_symbol, LEX_FOR, BREAK|CONTINUE, 0}, +{"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0}, +{"function",Op_func, LEX_FUNCTION, NOT_OLD, 0}, +{"gensub", Op_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub}, +{"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0}, +{"gsub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub}, +{"if", Op_K_if, LEX_IF, 0, 0}, +{"in", Op_symbol, LEX_IN, 0, 0}, +{"include", Op_symbol, LEX_INCLUDE, GAWKX, 0}, +{"index", Op_builtin, LEX_BUILTIN, A(2), do_index}, +{"int", Op_builtin, LEX_BUILTIN, A(1), do_int}, +{"length", Op_builtin, LEX_LENGTH, A(0)|A(1), do_length}, +{"log", Op_builtin, LEX_BUILTIN, A(1), do_log}, +{"lshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift}, +{"match", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match}, +{"mktime", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_mktime}, +{"next", Op_K_next, LEX_NEXT, 0, 0}, +{"nextfile", Op_K_nextfile, LEX_NEXTFILE, GAWKX, 0}, +{"or", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_or}, +{"patsplit", Op_builtin, LEX_BUILTIN, GAWKX|A(2)|A(3)|A(4), do_patsplit}, +{"print", Op_K_print, LEX_PRINT, 0, 0}, +{"printf", Op_K_printf, LEX_PRINTF, 0, 0}, +{"rand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand}, +{"return", Op_K_return, LEX_RETURN, NOT_OLD, 0}, +{"rshift", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift}, +{"sin", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin}, +{"split", Op_builtin, LEX_BUILTIN, A(2)|A(3)|A(4), do_split}, +{"sprintf", Op_builtin, LEX_BUILTIN, 0, do_sprintf}, +{"sqrt", Op_builtin, LEX_BUILTIN, A(1), do_sqrt}, +{"srand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand}, #if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */ -{"stopme", Node_builtin, LEX_BUILTIN, GAWKX|A(0), stopme}, +{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0), stopme}, #endif -{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime}, -{"strtonum", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum}, -{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub}, -{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr}, -{"switch", Node_K_switch, LEX_SWITCH, GAWKX, 0}, -{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system}, -{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime}, -{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower}, -{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper}, -{"while", Node_K_while, LEX_WHILE, 0, 0}, -{"xor", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor}, +{"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime}, +{"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum}, +{"sub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub}, +{"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr}, +{"switch", Op_symbol, LEX_SWITCH, GAWKX|BREAK, 0}, +{"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system}, +{"systime", Op_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime}, +{"tolower", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower}, +{"toupper", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper}, +{"while", Op_symbol, LEX_WHILE, BREAK|CONTINUE, 0}, +{"xor", Op_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor}, }; #ifdef MBS_SUPPORT @@ -1281,9 +1923,9 @@ static int cur_ring_idx; /* getfname --- return name of a builtin function (for pretty printing) */ const char * -getfname(register NODE *(*fptr)(NODE *)) +getfname(NODE *(*fptr)(int)) { - register int i, j; + int i, j; j = sizeof(tokentab) / sizeof(tokentab[0]); /* linear search, no other way to do it */ @@ -1302,22 +1944,44 @@ getfname(register NODE *(*fptr)(NODE *)) */ static void -#ifdef CAN_USE_STDARG_H - yyerror(const char *m, ...) -#else -/* VARARGS0 */ - yyerror(va_alist) - va_dcl -#endif +yyerror(const char *m, ...) { va_list args; const char *mesg = NULL; - register char *bp, *cp; + char *bp, *cp; char *scan; char *buf; int count; static char end_of_file_line[] = "(END OF FILE)"; char save; + int saveline; + SRCFILE *s; + + /* suppress current file name, line # from `.. included from ..' msgs */ + saveline = sourceline; + sourceline = 0; + + for (s = sourcefile; s->stype == SRC_INC; ) { + int line; + s = s->next; + if (s->fd <= INVALID_HANDLE) + continue; + + line = s->srclines; + /* if last token is NEWLINE, line number is off by 1. */ + if (s->lasttok == NEWLINE) + line--; + + msg("%s %s:%d%c", + s->prev == sourcefile ? "In file included from" + : " from", + (s->stype == SRC_INC || + s->stype == SRC_FILE) ? s->src : "cmd. line", + line, + s->stype == SRC_INC ? ',' : ':' + ); + } + sourceline = saveline; errcount++; /* Find the current line in the input file */ @@ -1357,15 +2021,9 @@ static void *bp = save; -#ifdef CAN_USE_STDARG_H va_start(args, m); if (mesg == NULL) mesg = m; -#else - va_start(args); - if (mesg == NULL) - mesg = va_arg(args, char *); -#endif count = (bp - thisline) + strlen(mesg) + 2 + 1; emalloc(buf, char *, count, "yyerror"); @@ -1384,7 +2042,333 @@ static void strcpy(bp, mesg); err("", buf, args); va_end(args); - free(buf); + efree(buf); +} + +/* mk_program --- create a single list of instructions */ + +static INSTRUCTION * +mk_program() +{ + INSTRUCTION *cp, *tmp; + +#define begin_block rule_block[BEGIN] +#define end_block rule_block[END] +#define prog_block rule_block[Rule] +#define beginfile_block rule_block[BEGINFILE] +#define endfile_block rule_block[ENDFILE] + + if (end_block == NULL) + end_block = list_create(ip_end); + else + (void) list_prepend(end_block, ip_end); + + if (get_context()->level > 0) { + if (begin_block != NULL && prog_block != NULL) + cp = list_merge(begin_block, prog_block); + else + cp = (begin_block != NULL) ? begin_block : prog_block; + + if (cp != NULL) + (void) list_merge(cp, end_block); + else + cp = end_block; + + (void) list_append(cp, instruction(Op_stop)); + goto out; + } + + if (endfile_block == NULL) + endfile_block = list_create(ip_endfile); + else { + extern int has_endfile; /* kludge for use in inrec (io.c) */ + has_endfile = TRUE; + (void) list_prepend(endfile_block, ip_endfile); + } + + if (beginfile_block == NULL) + beginfile_block = list_create(ip_beginfile); + else + (void) list_prepend(beginfile_block, ip_beginfile); + + if (prog_block == NULL) { + if (end_block->nexti == end_block->lasti + && beginfile_block->nexti == beginfile_block->lasti + && endfile_block->nexti == endfile_block->lasti + ) { + /* no pattern-action and (real) end, beginfile or endfile blocks */ + bcfree(ip_rec); + bcfree(ip_newfile); + ip_rec = ip_newfile = NULL; + + list_append(beginfile_block, instruction(Op_after_beginfile)); + (void) list_append(endfile_block, instruction(Op_after_endfile)); + + if (begin_block == NULL) /* no program at all */ + cp = end_block; + else + cp = list_merge(begin_block, end_block); + (void) list_append(cp, ip_atexit); + (void) list_append(cp, instruction(Op_stop)); + + /* append beginfile_block and endfile_block for sole use + * in getline without redirection (Op_K_getline). + */ + + (void) list_merge(cp, beginfile_block); + (void) list_merge(cp, endfile_block); + + goto out; + + } else { + /* install a do-nothing prog block */ + prog_block = list_create(instruction(Op_no_op)); + } + } + + (void) list_append(endfile_block, instruction(Op_after_endfile)); + (void) list_prepend(prog_block, ip_rec); + (void) list_append(prog_block, instruction(Op_jmp)); + prog_block->lasti->target_jmp = ip_rec; + + list_append(beginfile_block, instruction(Op_after_beginfile)); + + cp = list_merge(beginfile_block, prog_block); + (void) list_prepend(cp, ip_newfile); + (void) list_merge(cp, endfile_block); + (void) list_merge(cp, end_block); + if (begin_block != NULL) + cp = list_merge(begin_block, cp); + + (void) list_append(cp, ip_atexit); + (void) list_append(cp, instruction(Op_stop)); + +out: + /* delete the Op_list, not needed */ + tmp = cp->nexti; + bcfree(cp); + return tmp; + +#undef begin_block +#undef end_block +#undef prog_block +#undef beginfile_block +#undef endfile_block +} + +/* parse_program --- read in the program and convert into a list of instructions */ + +int +parse_program(INSTRUCTION **pcode) +{ + int ret; + + /* pre-create non-local jump targets + * ip_end (Op_no_op) -- used as jump target for `exit' + * outside an END block. + */ + ip_end = instruction(Op_no_op); + + if (get_context()->level > 0) + ip_newfile = ip_rec = ip_atexit = ip_beginfile = ip_endfile = NULL; + else { + ip_endfile = instruction(Op_no_op); + ip_beginfile = instruction(Op_no_op); + ip_newfile = instruction(Op_newfile); /* target for `nextfile' */ + ip_newfile->target_jmp = ip_end; + ip_newfile->target_endfile = ip_endfile; + ip_rec = instruction(Op_get_record); /* target for `next' */ + ip_atexit = instruction(Op_atexit); /* target for `exit' in END block */ + } + + sourcefile = srcfiles->next; + lexeof = FALSE; + lexptr = NULL; + lasttok = 0; + memset(rule_block, 0, sizeof(ruletab) * sizeof(INSTRUCTION *)); + errcount = 0; + tok = tokstart != NULL ? tokstart : tokexpand(); + + ret = yyparse(); + *pcode = mk_program(); + + /* avoid false source indications */ + source = NULL; + sourceline = 0; + + check_funcs(); + return (ret || errcount); +} + +/* do_add_srcfile --- add one item to srcfiles */ + +static SRCFILE * +do_add_srcfile(int stype, char *src, char *path, SRCFILE *thisfile) +{ + SRCFILE *s; + + emalloc(s, SRCFILE *, sizeof(SRCFILE), "do_add_srcfile"); + memset(s, 0, sizeof(SRCFILE)); + s->src = estrdup(src, strlen(src)); + s->fullpath = path; + s->stype = stype; + s->fd = INVALID_HANDLE; + s->next = thisfile; + s->prev = thisfile->prev; + thisfile->prev->next = s; + thisfile->prev = s; + return s; +} + +/* add_srcfile --- add one item to srcfiles after checking if + * a source file exists and not already in list. + */ + +SRCFILE * +add_srcfile(int stype, char *src, SRCFILE *thisfile, int *already_included, int *errcode) +{ + SRCFILE *s; + struct stat sbuf; + char *path; + int errno_val = 0; + + if (already_included) + *already_included = FALSE; + if (errcode) + *errcode = 0; + if (stype == SRC_CMDLINE || stype == SRC_STDIN) + return do_add_srcfile(stype, src, NULL, thisfile); + + path = find_source(src, &sbuf, &errno_val); + if (path == NULL) { + if (errcode) { + *errcode = errno_val; + return NULL; + } + fatal(_("can't open source file `%s' for reading (%s)"), + src, errno_val ? strerror(errno_val) : _("reason unknown")); + } + + for (s = srcfiles->next; s != srcfiles; s = s->next) { + if ((s->stype == SRC_FILE || s->stype == SRC_INC) + && files_are_same(& sbuf, & s->sbuf) + ) { + if (do_lint) + lintwarn(_("already included source file `%s'"), src); + efree(path); + if (already_included) + *already_included = TRUE; + return NULL; + } + } + + s = do_add_srcfile(stype, src, path, thisfile); + s->sbuf = sbuf; + s->mtime = sbuf.st_mtime; + return s; +} + +/* include_source --- read program from source included using `@include' */ + +static int +include_source(char *src) +{ + SRCFILE *s; + int errcode; + int already_included; + + if (do_traditional || do_posix) { + error(_("@include is a gawk extension")); + errcount++; + return -1; + } + + if (strlen(src) == 0) { + if (do_lint) + lintwarn(_("empty filename after @include")); + return 0; + } + + s = add_srcfile(SRC_INC, src, sourcefile, &already_included, &errcode); + if (s == NULL) { + if (already_included) + return 0; + error(_("can't open source file `%s' for reading (%s)"), + src, errcode ? strerror(errcode) : _("reason unknown")); + errcount++; + return -1; + } + + /* save scanner state for the current sourcefile */ + sourcefile->srclines = sourceline; + sourcefile->lexptr = lexptr; + sourcefile->lexend = lexend; + sourcefile->lexptr_begin = lexptr_begin; + sourcefile->lexeme = lexeme; + sourcefile->lasttok = lasttok; + + /* included file becomes the current source */ + sourcefile = s; + lexptr = NULL; + sourceline = 0; + source = NULL; + lasttok = 0; + lexeof = FALSE; + eof_warned = FALSE; + return 0; +} + +/* next_sourcefile --- read program from the next source in srcfiles */ + +static void +next_sourcefile() +{ + static int (*closefunc)(int fd) = NULL; + + if (closefunc == NULL) { + char *cp = getenv("AWKREADFUNC"); + + /* If necessary, one day, test value for different functions. */ + if (cp == NULL) + closefunc = close; + else + closefunc = one_line_close; + } + + assert(lexeof == TRUE); + lexeof = FALSE; + eof_warned = FALSE; + sourcefile->srclines = sourceline; /* total no of lines in current file */ + if (sourcefile->fd > INVALID_HANDLE) { + if (sourcefile->fd != fileno(stdin)) /* safety */ + (*closefunc)(sourcefile->fd); + sourcefile->fd = INVALID_HANDLE; + } + if (sourcefile->buf != NULL) { + efree(sourcefile->buf); + sourcefile->buf = NULL; + sourcefile->lexptr_begin = NULL; + } + + sourcefile = sourcefile->next; + if (sourcefile == srcfiles) + return; + + if (sourcefile->lexptr_begin != NULL) { + /* resume reading from already opened file (postponed to process '@include') */ + lexptr = sourcefile->lexptr; + lexend = sourcefile->lexend; + lasttok = sourcefile->lasttok; + lexptr_begin = sourcefile->lexptr_begin; + lexeme = sourcefile->lexeme; + sourceline = sourcefile->srclines; + source = sourcefile->src; + } else { + lexptr = NULL; + sourceline = 0; + source = NULL; + lasttok = 0; + } } /* get_src_buf --- read the next buffer of source program */ @@ -1392,11 +2376,11 @@ static void static char * get_src_buf() { - static int samefile = FALSE; - static int nextfile = 0; - static char *buf = NULL; - static size_t buflen = 0; - static int fd; + int n; + char *scan; + int newfile; + int savelen; + struct stat sbuf; /* * No argument prototype on readfunc on purpose, @@ -1404,105 +2388,94 @@ get_src_buf() * the types of arguments to read() aren't up to date. */ static ssize_t (*readfunc)() = 0; - static int (*closefunc)P((int fd)) = NULL; - - int n; - register char *scan; - int newfile; - struct stat sbuf; - int readcount = 0; - int l; - char *readloc; if (readfunc == NULL) { char *cp = getenv("AWKREADFUNC"); /* If necessary, one day, test value for different functions. */ - if (cp == NULL) { + if (cp == NULL) readfunc = read; - closefunc = close; - } else { + else readfunc = read_one_line; - closefunc = one_line_close; - } } -again: newfile = FALSE; - if (nextfile > numfiles) + if (sourcefile == srcfiles) return NULL; - if (srcfiles[nextfile].stype == CMDLINE) { - if ((l = strlen(srcfiles[nextfile].val)) == 0) { + if (sourcefile->stype == SRC_CMDLINE) { + if (sourcefile->bufsize == 0) { + sourcefile->bufsize = strlen(sourcefile->src); + lexptr = lexptr_begin = lexeme = sourcefile->src; + lexend = lexptr + sourcefile->bufsize; + sourceline = 1; + if (sourcefile->bufsize == 0) { + /* + * Yet Another Special case: + * gawk '' /path/name + * Sigh. + */ + static short warned = FALSE; + + if (do_lint && ! warned) { + warned = TRUE; + lintwarn(_("empty program text on command line")); + } + lexeof = TRUE; + } + } else if (sourcefile->buf == NULL && *(lexptr-1) != '\n') { /* - * Yet Another Special case: - * gawk '' /path/name - * Sigh. + * The following goop is to ensure that the source + * ends with a newline and that the entire current + * line is available for error messages. */ - static short warned = FALSE; + int offset; + char *buf; - if (do_lint && ! warned) { - warned = TRUE; - lintwarn(_("empty program text on command line")); - } - ++nextfile; - goto again; - } - if (srcfiles[nextfile].val[l-1] == '\n') { - /* has terminating newline, can use it directly */ - sourceline = 1; - source = NULL; - lexptr = lexptr_begin = srcfiles[nextfile].val; - /* fall through to pointer adjustment and return, below */ - } else { - /* copy it into static buffer */ - - /* make sure buffer exists and has room */ - if (buflen == 0) { - emalloc(buf, char *, l+2, "get_src_buf"); - buflen = l + 2; - } else if (l+2 > buflen) { - erealloc(buf, char *, l+2, "get_src_buf"); - buflen = l + 2; - } /* else - buffer has room, just use it */ - - /* copy in data */ - memcpy(buf, srcfiles[nextfile].val, l); - buf[l] = '\n'; - buf[++l] = '\0'; - - /* set vars and return */ - sourceline = 0; - source = NULL; - lexptr = lexptr_begin = buf; - } - lexend = lexptr + l; - nextfile++; /* for next entry to this routine */ + offset = lexptr - lexeme; + for (scan = lexeme; scan > lexptr_begin; scan--) + if (*scan == '\n') { + scan++; + break; + } + savelen = lexptr - scan; + emalloc(buf, char *, savelen + 1, "get_src_buf"); + memcpy(buf, scan, savelen); + thisline = buf; + lexptr = buf + savelen; + *lexptr = '\n'; + lexeme = lexptr - offset; + lexptr_begin = buf; + lexend = lexptr + 1; + sourcefile->buf = buf; + } else + lexeof = TRUE; return lexptr; } - if (! samefile) { - source = srcfiles[nextfile].val; - if (source == NULL) { /* read all the source files, all done */ - if (buf != NULL) { - free(buf); - buf = NULL; - } - buflen = 0; - return lexeme = lexptr = lexptr_begin = NULL; - } - fd = pathopen(source); + if (sourcefile->fd <= INVALID_HANDLE) { + int fd; + int l; + + source = sourcefile->src; + if (source == NULL) + return NULL; + fd = srcopen(sourcefile); if (fd <= INVALID_HANDLE) { char *in; /* suppress file name and line no. in error mesg */ in = source; source = NULL; - fatal(_("can't open source file `%s' for reading (%s)"), + error(_("can't open source file `%s' for reading (%s)"), in, strerror(errno)); + errcount++; + lexeof = TRUE; + return sourcefile->src; } - l = optimal_bufsize(fd, & sbuf); + + sourcefile->fd = fd; + l = optimal_bufsize(fd, &sbuf); /* * Make sure that something silly like * AWKBUFSIZE=8 make check @@ -1512,110 +2485,71 @@ again: if (l < A_DECENT_BUFFER_SIZE) l = A_DECENT_BUFFER_SIZE; #undef A_DECENT_BUFFER_SIZE - + sourcefile->bufsize = l; newfile = TRUE; - - /* make sure buffer exists and has room */ - if (buflen == 0) { - emalloc(buf, char *, l+2, "get_src_buf"); - buflen = l + 2; - } else if (l+2 > buflen) { - erealloc(buf, char *, l+2, "get_src_buf"); - buflen = l + 2; - } /* else - buffer has room, just use it */ - - readcount = l; - readloc = lexeme = lexptr = lexptr_begin = buf; - samefile = TRUE; + emalloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf"); + lexptr = lexptr_begin = lexeme = sourcefile->buf; + savelen = 0; sourceline = 1; + thisline = NULL; } else { /* - * In same file, ran off edge of buffer. - * Shift current line down to front, adjust - * pointers and fill in the rest of the buffer. + * Here, we retain the current source line in the beginning of the buffer. */ - - int lexeme_offset = lexeme - lexptr_begin; - int lexptr_offset = lexptr - lexptr_begin; - int lexend_offset = lexend - lexptr_begin; - - /* find beginning of current line */ - for (scan = lexeme; scan >= lexptr_begin; scan--) { + int offset; + for (scan = lexeme; scan > lexptr_begin; scan--) if (*scan == '\n') { scan++; break; } - } - /* - * This condition can be read as follows: IF - * 1. The beginning of the line is at the beginning of the - * buffer (no newline was found: scan <= buf) - * AND: - * 2. The start of valid lexical data is into the buffer - * (lexptr_begin > buf) - * OR: - * 3. We have scanned past the end of the last data read - * (lexptr == lexend) - * AND: - * 4. There's no room left in the buffer - * (lexptr_offset >= buflen - 2) - * - * If all that's true, grow the buffer to add more to - * the current line. - */ + savelen = lexptr - scan; + offset = lexptr - lexeme; - if (scan <= buf - && (lexptr_begin > buf - || (lexptr == lexend - && lexptr_offset >= buflen - 2))) { - /* have to grow the buffer */ - buflen *= 2; - erealloc(buf, char *, buflen, "get_src_buf"); - } else if (scan > buf) { - /* Line starts in middle of the buffer, shift things down. */ - memmove(buf, scan, lexend - scan); + if (savelen > 0) { /* - * make offsets relative to start of line, - * not start of buffer. + * Need to make sure we have room left for reading new text; + * grow the buffer (by doubling, an arbitrary choice), if the retained line + * takes up more than a certain percentage (50%, again an arbitrary figure) + * of the available space. */ - lexend_offset = lexend - scan; - lexeme_offset = lexeme - scan; - lexptr_offset = lexptr - scan; - } - - /* adjust pointers */ - lexeme = buf + lexeme_offset; - lexptr = buf + lexptr_offset; - lexend = buf + lexend_offset; - lexptr_begin = buf; - readcount = buflen - (lexend - buf); - readloc = lexend; - } - - /* add more data to buffer */ - n = (*readfunc)(fd, readloc, readcount); - if (n == -1) - fatal(_("can't read sourcefile `%s' (%s)"), - source, strerror(errno)); - if (n == 0) { - if (newfile) { - static short warned = FALSE; - if (do_lint && ! warned) { + if (savelen > sourcefile->bufsize / 2) { /* long line or token */ + sourcefile->bufsize *= 2; + erealloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf"); + scan = sourcefile->buf + (scan - lexptr_begin); + lexptr_begin = sourcefile->buf; + } + + thisline = lexptr_begin; + memmove(thisline, scan, savelen); + lexptr = thisline + savelen; + lexeme = lexptr - offset; + } else { + savelen = 0; + lexptr = lexeme = lexptr_begin; + thisline = NULL; + } + } + + n = (*readfunc)(sourcefile->fd, lexptr, sourcefile->bufsize - savelen); + if (n == -1) { + error(_("can't read sourcefile `%s' (%s)"), + source, strerror(errno)); + errcount++; + lexeof = TRUE; + } else { + lexend = lexptr + n; + if (n == 0) { + static short warned = FALSE; + if (do_lint && newfile && ! warned){ warned = TRUE; lintwarn(_("source file `%s' is empty"), source); } + lexeof = TRUE; } - if (fd != fileno(stdin)) /* safety */ - (*closefunc)(fd); - samefile = FALSE; - nextfile++; - goto again; } - lexend = lexptr + n; - return lexptr; + return sourcefile->buf; } /* tokadd --- add a character to the token buffer */ @@ -1624,20 +2558,23 @@ again: /* tokexpand --- grow the token buffer */ -char * +static char * tokexpand() { - static int toksize = 60; + static int toksize; int tokoffset; - - tokoffset = tok - tokstart; - toksize *= 2; - if (tokstart != NULL) + + if (tokstart != NULL) { + tokoffset = tok - tokstart; + toksize *= 2; erealloc(tokstart, char *, toksize, "tokexpand"); - else + tok = tokstart + tokoffset; + } else { + toksize = 60; emalloc(tokstart, char *, toksize, "tokexpand"); + tok = tokstart; + } tokend = tokstart + toksize; - tok = tokstart + tokoffset; return tok; } @@ -1649,9 +2586,13 @@ static int nextc(void) { if (gawk_mb_cur_max > 1) { - if (!lexptr || lexptr >= lexend) { - if (! get_src_buf()) - return EOF; +again: + if (lexeof) + return END_FILE; + if (lexptr == NULL || lexptr >= lexend) { + if (get_src_buf()) + goto again; + return END_SRC; } /* Update the buffer index. */ @@ -1664,7 +2605,7 @@ nextc(void) int idx, work_ring_idx = cur_ring_idx; mbstate_t tmp_state; size_t mbclen; - + for (idx = 0 ; lexptr + idx < lexend ; idx++) { tmp_state = cur_mbstate; mbclen = mbrlen(lexptr, idx + 1, &tmp_state); @@ -1695,44 +2636,30 @@ nextc(void) } return (int) (unsigned char) *lexptr++; - } - else { - int c; - - if (lexptr && lexptr < lexend) - c = (int) (unsigned char) *lexptr++; - else if (get_src_buf()) - c = (int) (unsigned char) *lexptr++; - else - c = EOF; - - return c; + } else { + do { + if (lexeof) + return END_FILE; + if (lexptr && lexptr < lexend) + return ((int) (unsigned char) *lexptr++); + } while (get_src_buf()); + return END_SRC; } } #else /* MBS_SUPPORT */ -#if GAWKDEBUG int -nextc(void) +nextc() { - int c; - - if (lexptr && lexptr < lexend) - c = (int) (unsigned char) *lexptr++; - else if (get_src_buf()) - c = (int) (unsigned char) *lexptr++; - else - c = EOF; - - return c; + do { + if (lexeof) + return END_FILE; + if (lexptr && lexptr < lexend) + return ((int) (unsigned char) *lexptr++); + } while (get_src_buf()); + return END_SRC; } -#else -#define nextc() ((lexptr && lexptr < lexend) ? \ - ((int) (unsigned char) *lexptr++) : \ - (get_src_buf() ? ((int) (unsigned char) *lexptr++) : EOF) \ - ) -#endif #endif /* MBS_SUPPORT */ @@ -1746,7 +2673,7 @@ pushback(void) cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 : cur_ring_idx - 1; #endif - (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr); + (! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr); } @@ -1759,13 +2686,17 @@ allow_newline(void) for (;;) { c = nextc(); - if (c == EOF) + if (c == END_FILE) { + pushback(); break; + } if (c == '#') { - while ((c = nextc()) != '\n' && c != EOF) + while ((c = nextc()) != '\n' && c != END_FILE) continue; - if (c == EOF) + if (c == END_FILE) { + pushback(); break; + } } if (c == '\n') sourceline++; @@ -1781,30 +2712,44 @@ allow_newline(void) static int yylex(void) { - register int c; + int c; int seen_e = FALSE; /* These are for numbers */ int seen_point = FALSE; int esc_seen; /* for literal strings */ int mid; static int did_newline = FALSE; char *tokkey; - static int lasttok = 0; - static short eof_warned = FALSE; int inhex = FALSE; int intlstr = FALSE; - if (nextc() == EOF) { - if (lasttok != NEWLINE) { - lasttok = NEWLINE; - if (do_lint && ! eof_warned) { - lintwarn(_("source file does not end in newline")); - eof_warned = TRUE; - } - return NEWLINE; /* fake it */ - } - return 0; +#define GET_INSTRUCTION(op) bcalloc(op, 1, sourceline) + + /* NB: a newline at end does not start a source line. */ + +#define NEWLINE_EOF \ + (lasttok != NEWLINE ? \ + (pushback(), do_lint && ! eof_warned && \ + (lintwarn(_("source file does not end in newline")), \ + eof_warned = TRUE), sourceline++, NEWLINE) : \ + (sourceline--, eof_warned = FALSE, LEX_EOF)) + + + yylval = (INSTRUCTION *) NULL; + if (lasttok == SUBSCRIPT) { + lasttok = 0; + return SUBSCRIPT; } + + if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */ + return 0; + + c = nextc(); + if (c == END_SRC) + return 0; + if (c == END_FILE) + return lasttok = NEWLINE_EOF; pushback(); + #if defined OS2 || defined __EMX__ /* * added for OS/2's extproc feature of cmd.exe @@ -1815,6 +2760,7 @@ yylex(void) lexptr++; } #endif + lexeme = lexptr; thisline = NULL; if (want_regexp) { @@ -1858,7 +2804,8 @@ yylex(void) in_brack--; break; case '\\': - if ((c = nextc()) == EOF) { + if ((c = nextc()) == END_FILE) { + pushback(); yyerror(_("unterminated regexp ends with `\\' at end of file")); goto end_regexp; /* kludge */ } else if (c == '\n') { @@ -1874,8 +2821,8 @@ yylex(void) if (in_brack > 0) break; end_regexp: - tokadd('\0'); - yylval.sval = tokstart; + yylval = GET_INSTRUCTION(Op_token); + yylval->lextok = estrdup(tokstart, tok - tokstart); if (do_lint) { int peek = nextc(); @@ -1896,7 +2843,8 @@ end_regexp: pushback(); yyerror(_("unterminated regexp")); goto end_regexp; /* kludge */ - case EOF: + case END_FILE: + pushback(); yyerror(_("unterminated regexp at end of file")); goto end_regexp; /* kludge */ } @@ -1912,42 +2860,32 @@ retry: lexeme = lexptr ? lexptr - 1 : lexptr; thisline = NULL; tok = tokstart; - yylval.nodetypeval = Node_illegal; - - if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) { - case EOF: - if (lasttok != NEWLINE) { - lasttok = NEWLINE; - if (do_lint && ! eof_warned) { - lintwarn(_("source file does not end in newline")); - eof_warned = TRUE; - } - return NEWLINE; /* fake it */ - } + +#ifdef MBS_SUPPORT + if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) +#endif + switch (c) { + case END_SRC: return 0; + case END_FILE: + return lasttok = NEWLINE_EOF; + case '\n': sourceline++; return lasttok = NEWLINE; case '#': /* it's a comment */ while ((c = nextc()) != '\n') { - if (c == EOF) { - if (lasttok != NEWLINE) { - lasttok = NEWLINE; - if (do_lint && ! eof_warned) { - lintwarn( - _("source file does not end in newline")); - eof_warned = TRUE; - } - return NEWLINE; /* fake it */ - } - return 0; - } + if (c == END_FILE) + return lasttok = NEWLINE_EOF; } sourceline++; return lasttok = NEWLINE; + case '@': + return lasttok = '@'; + case '\\': #ifdef RELAXED_CONTINUATION /* @@ -1969,23 +2907,27 @@ retry: _("use of `\\ #...' line continuation is not portable")); } while ((c = nextc()) != '\n') - if (c == EOF) + if (c == END_FILE) break; } pushback(); } #endif /* RELAXED_CONTINUATION */ - if (nextc() == '\n') { + c = nextc(); + if (c == '\r') /* allow MS-DOS files. bleah */ + c = nextc(); + if (c == '\n') { sourceline++; goto retry; } else { yyerror(_("backslash not last character on line")); - exit(EXIT_FAILURE); + return lasttok = LEX_EOF; } break; case ':': case '?': + yylval = GET_INSTRUCTION(Op_cond_exp); if (! do_posix) allow_newline(); return lasttok = c; @@ -2000,21 +2942,36 @@ retry: case '(': in_parens++; - /* FALL THROUGH */ + return lasttok = c; case '$': - case ';': + yylval = GET_INSTRUCTION(Op_field_spec); + return lasttok = c; case '{': + if (++in_braces == 1) + firstline = sourceline; + case ';': case ',': case '[': + return lasttok = c; case ']': - return lasttok = c; + c = nextc(); + pushback(); + if (c == '[') { + yylval = GET_INSTRUCTION(Op_sub_array); + lasttok = ']'; + } else { + yylval = GET_INSTRUCTION(Op_subscript); + lasttok = SUBSCRIPT; /* end of subscripts */ + } + return ']'; case '*': if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_assign_times; + yylval = GET_INSTRUCTION(Op_assign_times); return lasttok = ASSIGNOP; } else if (do_posix) { pushback(); + yylval = GET_INSTRUCTION(Op_times); return lasttok = '*'; } else if (c == '*') { /* make ** and **= aliases for ^ and ^= */ @@ -2028,7 +2985,7 @@ retry: if (do_lint_old) warning(_("old awk does not support operator `**='")); } - yylval.nodetypeval = Node_assign_exp; + yylval = GET_INSTRUCTION(Op_assign_exp); return ASSIGNOP; } else { pushback(); @@ -2039,10 +2996,12 @@ retry: if (do_lint_old) warning(_("old awk does not support operator `**'")); } + yylval = GET_INSTRUCTION(Op_exp); return lasttok = '^'; } } pushback(); + yylval = GET_INSTRUCTION(Op_times); return lasttok = '*'; case '/': @@ -2051,14 +3010,16 @@ retry: return lasttok = SLASH_BEFORE_EQUAL; } pushback(); + yylval = GET_INSTRUCTION(Op_quotient); return lasttok = '/'; case '%': if (nextc() == '=') { - yylval.nodetypeval = Node_assign_mod; + yylval = GET_INSTRUCTION(Op_assign_mod); return lasttok = ASSIGNOP; } pushback(); + yylval = GET_INSTRUCTION(Op_mod); return lasttok = '%'; case '^': @@ -2070,7 +3031,7 @@ retry: did_warn_assgn = TRUE; warning(_("operator `^=' is not supported in old awk")); } - yylval.nodetypeval = Node_assign_exp; + yylval = GET_INSTRUCTION(Op_assign_exp); return lasttok = ASSIGNOP; } pushback(); @@ -2078,67 +3039,74 @@ retry: did_warn_op = TRUE; warning(_("operator `^' is not supported in old awk")); } + yylval = GET_INSTRUCTION(Op_exp); return lasttok = '^'; } case '+': if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_assign_plus; + yylval = GET_INSTRUCTION(Op_assign_plus); return lasttok = ASSIGNOP; } - if (c == '+') + if (c == '+') { + yylval = GET_INSTRUCTION(Op_symbol); return lasttok = INCREMENT; + } pushback(); + yylval = GET_INSTRUCTION(Op_plus); return lasttok = '+'; case '!': if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_notequal; + yylval = GET_INSTRUCTION(Op_notequal); return lasttok = RELOP; } if (c == '~') { - yylval.nodetypeval = Node_nomatch; + yylval = GET_INSTRUCTION(Op_nomatch); return lasttok = MATCHOP; } pushback(); + yylval = GET_INSTRUCTION(Op_symbol); return lasttok = '!'; case '<': if (nextc() == '=') { - yylval.nodetypeval = Node_leq; + yylval = GET_INSTRUCTION(Op_leq); return lasttok = RELOP; } - yylval.nodetypeval = Node_less; + yylval = GET_INSTRUCTION(Op_less); pushback(); return lasttok = '<'; case '=': if (nextc() == '=') { - yylval.nodetypeval = Node_equal; + yylval = GET_INSTRUCTION(Op_equal); return lasttok = RELOP; } - yylval.nodetypeval = Node_assign; + yylval = GET_INSTRUCTION(Op_assign); pushback(); return lasttok = ASSIGN; case '>': if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_geq; + yylval = GET_INSTRUCTION(Op_geq); return lasttok = RELOP; } else if (c == '>') { - yylval.nodetypeval = Node_redirect_append; + yylval = GET_INSTRUCTION(Op_symbol); + yylval->redir_type = redirect_append; return lasttok = IO_OUT; } pushback(); if (in_print && in_parens == 0) { - yylval.nodetypeval = Node_redirect_output; + yylval = GET_INSTRUCTION(Op_symbol); + yylval->redir_type = redirect_output; return lasttok = IO_OUT; } - yylval.nodetypeval = Node_greater; + yylval = GET_INSTRUCTION(Op_greater); return lasttok = '>'; case '~': - yylval.nodetypeval = Node_match; + yylval = GET_INSTRUCTION(Op_match); return lasttok = MATCHOP; case '}': @@ -2148,6 +3116,8 @@ retry: */ if (did_newline) { did_newline = FALSE; + if (--in_braces == 0) + lastline = sourceline; return lasttok = c; } did_newline++; @@ -2161,7 +3131,7 @@ retry: if (c == '\n') { pushback(); yyerror(_("unterminated string")); - exit(EXIT_FAILURE); + return lasttok = LEX_EOF; } if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) && c == '\\') { @@ -2171,35 +3141,46 @@ retry: continue; } esc_seen = TRUE; - tokadd('\\'); + if (! want_source || c != '"') + tokadd('\\'); } - if (c == EOF) { + if (c == END_FILE) { pushback(); yyerror(_("unterminated string")); - exit(EXIT_FAILURE); + return lasttok = LEX_EOF; } tokadd(c); } - yylval.nodeval = make_str_node(tokstart, - tok - tokstart, esc_seen ? SCAN : 0); - yylval.nodeval->flags |= PERM; + yylval = GET_INSTRUCTION(Op_token); + if (want_source) { + yylval->lextok = estrdup(tokstart, tok - tokstart); + return lasttok = FILENAME; + } + + yylval->opcode = Op_push_i; + yylval->memory = make_str_node(tokstart, + tok - tokstart, esc_seen ? SCAN : 0); + yylval->memory->flags &= ~MALLOC; + yylval->memory->flags |= PERM; if (intlstr) { - yylval.nodeval->flags |= INTLSTR; + yylval->memory->flags |= INTLSTR; intlstr = FALSE; if (do_intl) - dumpintlstr(yylval.nodeval->stptr, - yylval.nodeval->stlen); - } + dumpintlstr(yylval->memory->stptr, yylval->memory->stlen); + } return lasttok = YSTRING; case '-': if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_assign_minus; + yylval = GET_INSTRUCTION(Op_assign_minus); return lasttok = ASSIGNOP; } - if (c == '-') + if (c == '-') { + yylval = GET_INSTRUCTION(Op_symbol); return lasttok = DECREMENT; + } pushback(); + yylval = GET_INSTRUCTION(Op_minus); return lasttok = '-'; case '.': @@ -2309,13 +3290,10 @@ retry: break; c = nextc(); } - if (c != EOF) - pushback(); - else if (do_lint && ! eof_warned) { - lintwarn(_("source file does not end in newline")); - eof_warned = TRUE; - } + pushback(); + tokadd('\0'); + yylval = GET_INSTRUCTION(Op_push_i); if (! do_traditional && isnondecimal(tokstart, FALSE)) { if (do_lint) { if (isdigit(tokstart[1])) /* not an 'x' or 'X' */ @@ -2325,48 +3303,47 @@ retry: lintwarn("numeric constant `%.*s' treated as hexadecimal", (int) strlen(tokstart)-1, tokstart); } - yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart))); + yylval->memory = mk_number(nondec2awknum(tokstart, strlen(tokstart)), + PERM|NUMCUR|NUMBER); } else - yylval.nodeval = make_number(atof(tokstart)); - yylval.nodeval->flags |= PERM; + yylval->memory = mk_number(atof(tokstart), PERM|NUMCUR|NUMBER); return lasttok = YNUMBER; case '&': if ((c = nextc()) == '&') { - yylval.nodetypeval = Node_and; + yylval = GET_INSTRUCTION(Op_and); allow_newline(); return lasttok = LEX_AND; } pushback(); + yylval = GET_INSTRUCTION(Op_symbol); return lasttok = '&'; case '|': if ((c = nextc()) == '|') { - yylval.nodetypeval = Node_or; + yylval = GET_INSTRUCTION(Op_or); allow_newline(); return lasttok = LEX_OR; } else if (! do_traditional && c == '&') { - yylval.nodetypeval = Node_redirect_twoway; + yylval = GET_INSTRUCTION(Op_symbol); + yylval->redir_type = redirect_twoway; return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN); } pushback(); if (in_print && in_parens == 0) { - yylval.nodetypeval = Node_redirect_pipe; + yylval = GET_INSTRUCTION(Op_symbol); + yylval->redir_type = redirect_pipe; return lasttok = IO_OUT; } else { - yylval.nodetypeval = Node_redirect_pipein; + yylval = GET_INSTRUCTION(Op_symbol); + yylval->redir_type = redirect_pipein; return lasttok = IO_IN; } - - case '@': /* indirect function call */ - if (do_posix || do_traditional) - break; - return lasttok = c; } if (c != '_' && ! isalpha(c)) { yyerror(_("invalid char '%c' in expression"), c); - exit(EXIT_FAILURE); + return lasttok = LEX_EOF; } /* @@ -2394,23 +3371,21 @@ retry: /* it's some type of name-type-thing. Find its length. */ tok = tokstart; - while (is_identchar(c)) { + while (c != END_FILE && is_identchar(c)) { tokadd(c); c = nextc(); } tokadd('\0'); - emalloc(tokkey, char *, tok - tokstart, "yylex"); - memcpy(tokkey, tokstart, tok - tokstart); - if (c != EOF) - pushback(); - else if (do_lint && ! eof_warned) { - lintwarn(_("source file does not end in newline")); - eof_warned = TRUE; - } + pushback(); /* See if it is a special token. */ - if ((mid = check_special(tokstart)) >= 0) { + int class = tokentab[mid].class; + + if ((class == LEX_INCLUDE || class == LEX_EVAL) + && lasttok != '@') + goto out; + if (do_lint) { if (tokentab[mid].flags & GAWKX) lintwarn(_("`%s' is a gawk extension"), @@ -2425,26 +3400,64 @@ retry: if (do_lint_old && (tokentab[mid].flags & NOT_OLD)) warning(_("`%s' is not supported in old awk"), tokentab[mid].operator); + if (tokentab[mid].flags & BREAK) + break_allowed++; + if (tokentab[mid].flags & CONTINUE) + continue_allowed++; if ((do_traditional && (tokentab[mid].flags & GAWKX)) || (do_posix && (tokentab[mid].flags & NOT_POSIX))) - ; - else { - if (tokentab[mid].class == LEX_BUILTIN - || tokentab[mid].class == LEX_LENGTH) - yylval.lval = mid; - else - yylval.nodetypeval = tokentab[mid].value; - free(tokkey); - return lasttok = tokentab[mid].class; + goto out; + + switch (class) { + case LEX_INCLUDE: + want_source = TRUE; + break; + case LEX_EVAL: + if (get_context()->level == 0) + goto out; + emalloc(tokkey, char *, tok - tokstart + 1, "yylex"); + tokkey[0] = '@'; + memcpy(tokkey + 1, tokstart, tok - tokstart); + yylval = GET_INSTRUCTION(Op_token); + yylval->lextok = tokkey; + break; + + case LEX_FUNCTION: + case LEX_BEGIN: + case LEX_END: + case LEX_BEGINFILE: + case LEX_ENDFILE: + yylval = bcalloc(tokentab[mid].value, 3, sourceline); + break; + + case LEX_WHILE: + case LEX_DO: + case LEX_FOR: + case LEX_SWITCH: + yylval = bcalloc(tokentab[mid].value, + !!do_profiling + 1, sourceline); + break; + + default: + yylval = GET_INSTRUCTION(tokentab[mid].value); + if (class == LEX_BUILTIN || class == LEX_LENGTH) + yylval->builtin_idx = mid; + break; } + return lasttok = class; } - - yylval.sval = tokkey; - if (*lexptr == '(') +out: + tokkey = estrdup(tokstart, tok - tokstart); + if (*lexptr == '(') { + yylval = bcalloc(Op_token, 2, sourceline); + yylval->lextok = tokkey; return lasttok = FUNC_CALL; - else { + } else { static short goto_warned = FALSE; + yylval = GET_INSTRUCTION(Op_token); + yylval->lextok = tokkey; + #define SMART_ALECK 1 if (SMART_ALECK && do_lint && ! goto_warned && strcasecmp(tokkey, "goto") == 0) { @@ -2453,147 +3466,215 @@ retry: } return lasttok = NAME; } -} - -/* node_common --- common code for allocating a new node */ - -static NODE * -node_common(NODETYPE op) -{ - register NODE *r; - getnode(r); - r->type = op; - r->flags = MALLOC; - /* if lookahead is a NL, lineno is 1 too high */ - if (lexeme && lexeme >= lexptr_begin && *lexeme == '\n') - r->source_line = sourceline - 1; - else - r->source_line = sourceline; - r->source_file = source; - return r; +#undef GET_INSTRUCTION +#undef NEWLINE_EOF } -/* node --- allocates a node with defined lnode and rnode. */ +/* mk_symbol --- allocates a symbol for the symbol table. */ NODE * -node(NODE *left, NODETYPE op, NODE *right) +mk_symbol(NODETYPE type, NODE *value) { - register NODE *r; + NODE *r; - r = node_common(op); - r->lnode = left; - r->rnode = right; + getnode(r); + r->type = type; + r->flags = MALLOC; + r->lnode = value; + r->rnode = NULL; + r->var_assign = (Func_ptr) 0; return r; } -/* snode --- allocate a node with defined subnode and builtin for builtin - functions. Checks for arg. count and supplies defaults where - possible. */ +/* snode --- instructions for builtin functions. Checks for arg. count + and supplies defaults where possible. */ -static NODE * -snode(NODE *subn, NODETYPE op, int idx) +static INSTRUCTION * +snode(INSTRUCTION *subn, INSTRUCTION *r) { - register NODE *r; - register NODE *n; + INSTRUCTION *arg; + INSTRUCTION *ip; + NODE *n; int nexp = 0; int args_allowed; + int idx = r->builtin_idx; + + if (subn != NULL) { + INSTRUCTION *tp; + for (tp = subn->nexti; tp; tp = tp->nexti) { + /* assert(tp->opcode == Op_list); */ + tp = tp->lasti; + nexp++; + } + assert(nexp > 0); + } - r = node_common(op); - - /* traverse expression list to see how many args. given */ - for (n = subn; n != NULL; n = n->rnode) { - nexp++; - if (nexp > 5) - break; - } + r->builtin = tokentab[idx].ptr; /* check against how many args. are allowed for this builtin */ args_allowed = tokentab[idx].flags & ARGS; - if (args_allowed && (args_allowed & A(nexp)) == 0) - fatal(_("%d is invalid as number of arguments for %s"), + if (args_allowed && (args_allowed & A(nexp)) == 0) { + yyerror(_("%d is invalid as number of arguments for %s"), nexp, tokentab[idx].operator); - - r->builtin = tokentab[idx].ptr; + return NULL; + } /* special case processing for a few builtins */ - if (nexp == 0 && r->builtin == do_length) { - subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL), - Node_expression_list, - (NODE *) NULL); + if (r->builtin == do_length) { + if (nexp == 0) { + INSTRUCTION *list; /* no args. Use $0 */ + + r->expr_count = 1; + list = list_create(r); + (void) list_prepend(list, instruction(Op_field_spec)); + (void) list_prepend(list, instruction(Op_push_i)); + list->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); + return list; + } } else if (r->builtin == do_match) { static short warned = FALSE; - if (subn->rnode->lnode->type != Node_regex) - subn->rnode->lnode = mk_rexp(subn->rnode->lnode); + arg = subn->nexti->lasti->nexti; /* 2nd arg list */ + (void) mk_rexp(arg); - if (subn->rnode->rnode != NULL) { /* 3rd argument there */ + if (nexp == 3) { /* 3rd argument there */ if (do_lint && ! warned) { warned = TRUE; lintwarn(_("match: third argument is a gawk extension")); } - if (do_traditional) - fatal(_("match: third argument is a gawk extension")); + if (do_traditional) { + yyerror(_("match: third argument is a gawk extension")); + return NULL; + } + + arg = arg->lasti->nexti; /* third arg list */ + ip = arg->lasti; + if (/*ip == arg->nexti && */ ip->opcode == Op_push) + ip->opcode = Op_push_array; } } else if (r->builtin == do_sub || r->builtin == do_gsub) { - if (subn->lnode->type != Node_regex) - subn->lnode = mk_rexp(subn->lnode); - if (nexp == 2) - append_right(subn, node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL)); - else if (subn->rnode->rnode->lnode->type == Node_val) { + int literal = FALSE; + + arg = subn->nexti; /* first arg list */ + (void) mk_rexp(arg); + + arg = arg->lasti->nexti; /* 2nd arg list */ + if (nexp == 2) { + INSTRUCTION *expr; + expr = list_create(instruction(Op_push_i)); + expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); + (void) mk_expression_list(subn, + list_append(expr, instruction(Op_field_spec))); + } + + arg = arg->lasti->nexti; /* third arg list */ + ip = arg->lasti; + if (ip->opcode == Op_push_i) { if (do_lint) lintwarn(_("%s: string literal as last arg of substitute has no effect"), (r->builtin == do_sub) ? "sub" : "gsub"); - } else if (! isassignable(subn->rnode->rnode->lnode)) { - yyerror(_("%s third parameter is not a changeable object"), - (r->builtin == do_sub) ? "sub" : "gsub"); + literal = TRUE; + } else { + if (make_assignable(ip) == NULL) + yyerror(_("%s third parameter is not a changeable object"), + (r->builtin == do_sub) ? "sub" : "gsub"); + else + ip->do_reference = TRUE; + } + + /* kludge: This is one of the few cases + * when we need to know the type of item on stack. + * In case of string literal as the last argument, + * pass 4 as # of args (See sub_common code in builtin.c). + * Other cases like length(array or scalar) seem + * to work out ok. + */ + + r->expr_count = count_expressions(&subn, FALSE) + !!literal; + ip = subn->lasti; + + (void) list_append(subn, r); + + /* add after_assign bytecode(s) */ + if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) { + (void) list_append(subn, instruction(Op_var_assign)); + subn->lasti->memory = ip->memory; + } else if (ip->opcode == Op_field_spec_lhs) { + (void) list_append(subn, instruction(Op_field_assign)); + subn->lasti->field_assign = (Func_ptr) 0; + ip->target_assign = subn->lasti; } + return subn; } else if (r->builtin == do_gensub) { - if (subn->lnode->type != Node_regex) - subn->lnode = mk_rexp(subn->lnode); - if (nexp == 3) - append_right(subn, node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL)); + if (nexp == 3) { + arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */ + ip = instruction(Op_push_i); + ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); + (void) mk_expression_list(subn, + list_append(list_create(ip), + instruction(Op_field_spec))); + } + arg = subn->nexti; /* first arg list */ + (void) mk_rexp(arg); } else if (r->builtin == do_split) { + arg = subn->nexti->lasti->nexti; /* 2nd arg list */ + ip = arg->lasti; + if (ip->opcode == Op_push) + ip->opcode = Op_push_array; + if (nexp == 2) { + INSTRUCTION *expr; + expr = list_create(instruction(Op_push)); + expr->nexti->memory = FS_node; + (void) mk_expression_list(subn, expr); + } + arg = arg->lasti->nexti; + n = mk_rexp(arg); if (nexp == 2) - append_right(subn, - node(FS_node, Node_expression_list, (NODE *) NULL)); - n = subn->rnode->rnode->lnode; - if (n->type != Node_regex) - subn->rnode->rnode->lnode = mk_rexp(n); - if (nexp == 2) - subn->rnode->rnode->lnode->re_flags |= FS_DFLT; + n->re_flags |= FS_DFLT; + if (nexp == 4) { + arg = arg->lasti->nexti; + ip = arg->lasti; + if (ip->opcode == Op_push) + ip->opcode = Op_push_array; + } } else if (r->builtin == do_patsplit) { - if (nexp == 2) - append_right(subn, - node(FPAT_node, Node_expression_list, (NODE *) NULL)); - n = subn->rnode->rnode->lnode; - if (n->type != Node_regex) - subn->rnode->rnode->lnode = mk_rexp(n); + arg = subn->nexti->lasti->nexti; /* 2nd arg list */ + ip = arg->lasti; + if (ip->opcode == Op_push) + ip->opcode = Op_push_array; + if (nexp == 2) { + INSTRUCTION *expr; + expr = list_create(instruction(Op_push)); + expr->nexti->memory = FPAT_node; + (void) mk_expression_list(subn, expr); + } + arg = arg->lasti->nexti; + n = mk_rexp(arg); + if (nexp == 4) { + arg = arg->lasti->nexti; + ip = arg->lasti; + if (ip->opcode == Op_push) + ip->opcode = Op_push_array; + } } else if (r->builtin == do_close) { static short warned = FALSE; - - if ( nexp == 2) { - if (do_lint && nexp == 2 && ! warned) { + if (nexp == 2) { + if (do_lint && ! warned) { warned = TRUE; lintwarn(_("close: second argument is a gawk extension")); } - if (do_traditional) - fatal(_("close: second argument is a gawk extension")); + if (do_traditional) { + yyerror(_("close: second argument is a gawk extension")); + return NULL; + } } } else if (do_intl /* --gen-po */ && r->builtin == do_dcgettext /* dcgettext(...) */ - && subn->lnode->type == Node_val /* 1st arg is constant */ - && (subn->lnode->flags & STRCUR) != 0) { /* it's a string constant */ + && subn->nexti->lasti->opcode == Op_push_i /* 1st arg is constant */ + && (subn->nexti->lasti->memory->flags & STRCUR) != 0) { /* it's a string constant */ /* ala xgettext, dcgettext("some string" ...) dumps the string */ - NODE *str = subn->lnode; + NODE *str = subn->nexti->lasti->memory; if ((str->flags & INTLSTR) != 0) warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore")); @@ -2602,45 +3683,65 @@ snode(NODE *subn, NODETYPE op, int idx) dumpintlstr(str->stptr, str->stlen); } else if (do_intl /* --gen-po */ && r->builtin == do_dcngettext /* dcngettext(...) */ - && subn->lnode->type == Node_val /* 1st arg is constant */ - && (subn->lnode->flags & STRCUR) != 0 /* it's a string constant */ - && subn->rnode->lnode->type == Node_val /* 2nd arg is constant too */ - && (subn->rnode->lnode->flags & STRCUR) != 0) { /* it's a string constant */ + && subn->nexti->lasti->opcode == Op_push_i /* 1st arg is constant */ + && (subn->nexti->lasti->memory->flags & STRCUR) != 0 /* it's a string constant */ + && subn->nexti->lasti->nexti->lasti->opcode == Op_push_i /* 2nd arg is constant too */ + && (subn->nexti->lasti->nexti->lasti->memory->flags & STRCUR) != 0) { /* it's a string constant */ /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */ - NODE *str1 = subn->lnode; - NODE *str2 = subn->rnode->lnode; + NODE *str1 = subn->nexti->lasti->memory; + NODE *str2 = subn->nexti->lasti->nexti->lasti->memory; if (((str1->flags | str2->flags) & INTLSTR) != 0) warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore")); else dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen); + } else if (r->builtin == do_asort || r->builtin == do_asorti) { + arg = subn->nexti; /* 1st arg list */ + ip = arg->lasti; + if (/* ip == arg->nexti && */ ip->opcode == Op_push) + ip->opcode = Op_push_array; + if (nexp == 2) { + arg = ip->nexti; + ip = arg->lasti; + if (/* ip == arg->nexti && */ ip->opcode == Op_push) + ip->opcode = Op_push_array; + } } +#ifdef ARRAYDEBUG + else if (r->builtin == do_adump) { + ip = subn->nexti->lasti; + if (ip->opcode == Op_push) + ip->opcode = Op_push_array; + } +#endif - r->subnode = subn; - if (r->builtin == do_sprintf) { - count_args(r); - if (r->lnode != NULL) /* r->lnode set from subn. guard against syntax errors & check it's valid */ - r->lnode->printf_count = r->printf_count; /* hack */ + if (subn != NULL) { + r->expr_count = count_expressions(&subn, FALSE); + return list_append(subn, r); } - return r; + + r->expr_count = 0; + return list_create(r); } -/* make_for_loop --- build a for loop */ +/* append_param --- append PNAME to the list of parameters + * for the current function. + */ -static NODE * -make_for_loop(NODE *init, NODE *cond, NODE *incr) +static void +append_param(char *pname) { - register FOR_LOOP_HEADER *r; - NODE *n; + static NODE *savetail = NULL; + NODE *p; - emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop"); - getnode(n); - n->type = Node_illegal; - r->init = init; - r->cond = cond; - r->incr = incr; - n->sub.nodep.r.hd = r; - return n; + p = make_param(pname); + if (func_params == NULL) { + func_params = p; + savetail = p; + } else if (savetail != NULL) { + savetail->rnode = p; + savetail = p; + } } /* dup_parms --- return TRUE if there are duplicate parameters */ @@ -2648,7 +3749,7 @@ make_for_loop(NODE *init, NODE *cond, NODE *incr) static int dup_parms(NODE *func) { - register NODE *np; + NODE *np; const char *fname, **names; int count, i, j, dups; NODE *params; @@ -2671,7 +3772,7 @@ dup_parms(NODE *func) i = 0; for (np = params; np != NULL; np = np->rnode) { if (np->param == NULL) { /* error earlier, give up, go home */ - free(names); + efree(names); return TRUE; } names[i++] = np->param; @@ -2684,36 +3785,45 @@ dup_parms(NODE *func) dups++; error( _("function `%s': parameter #%d, `%s', duplicates parameter #%d"), - fname, i+1, names[j], j+1); + fname, i + 1, names[j], j+1); } } } - free(names); + efree(names); return (dups > 0 ? TRUE : FALSE); } /* parms_shadow --- check if parameters shadow globals */ static int -parms_shadow(const char *fname, NODE *func) +parms_shadow(INSTRUCTION *pc, int *shadow) { - int count, i; + int pcount, i; int ret = FALSE; + NODE *func; + char *fname; + func = pc->func_body; + fname = func->lnode->param; + +#if 0 /* can't happen, already exited if error ? */ if (fname == NULL || func == NULL) /* error earlier */ return FALSE; +#endif - count = func->lnode->param_cnt; + pcount = func->lnode->param_cnt; - if (count == 0) /* no args, no problem */ - return FALSE; + if (pcount == 0) /* no args, no problem */ + return 0; + source = pc->source_file; + sourceline = pc->source_line; /* * Use warning() and not lintwarn() so that can warn * about all shadowed parameters. */ - for (i = 0; i < count; i++) { + for (i = 0; i < pcount; i++) { if (lookup(func->parmlist[i]) != NULL) { warning( _("function `%s': parameter `%s' shadows global variable"), @@ -2722,21 +3832,27 @@ parms_shadow(const char *fname, NODE *func) } } - return ret; + *shadow |= ret; + return 0; } + /* - * install: + * install_symbol: * Install a name in the symbol table, even if it is already there. * Caller must check against redefinition if that is desired. */ + NODE * -install(char *name, NODE *value) +install_symbol(char *name, NODE *value) { - register NODE *hp; - register size_t len; - register int bucket; + NODE *hp; + size_t len; + int bucket; + + if (install_func) + (*install_func)(name); var_count++; len = strlen(name); @@ -2752,27 +3868,26 @@ install(char *name, NODE *value) return hp->hvalue; } -/* lookup --- find the most recent hash node for name installed by install */ +/* lookup --- find the most recent hash node for name installed by install_symbol */ NODE * lookup(const char *name) { - register NODE *bucket; - register size_t len; + NODE *bucket; + size_t len; len = strlen(name); for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE, NULL)]; bucket != NULL; bucket = bucket->hnext) if (bucket->hlength == len && STREQN(bucket->hname, name, len)) return bucket->hvalue; - return NULL; } -/* var_comp --- compare two variable names */ +/* sym_comp --- compare two symbol (variable or function) names */ static int -var_comp(const void *v1, const void *v2) +sym_comp(const void *v1, const void *v2) { const NODE *const *npp1, *const *npp2; const NODE *n1, *n2; @@ -2793,77 +3908,90 @@ var_comp(const void *v1, const void *v2) /* valinfo --- dump var info */ -static void -valinfo(NODE *n, FILE *fp) +void +valinfo(NODE *n, int (*print_func)(FILE *, const char *, ...), FILE *fp) { - if (n->flags & STRING) { - fprintf(fp, "string ("); - pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE); - fprintf(fp, ")\n"); + if (n == Nnull_string) + print_func(fp, "uninitialized scalar\n"); + else if (n->flags & STRING) { + pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', FALSE); + print_func(fp, "\n"); } else if (n->flags & NUMBER) - fprintf(fp, "number (%.17g)\n", n->numbr); + print_func(fp, "%.17g\n", n->numbr); else if (n->flags & STRCUR) { - fprintf(fp, "string value ("); - pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE); - fprintf(fp, ")\n"); + pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', FALSE); + print_func(fp, "\n"); } else if (n->flags & NUMCUR) - fprintf(fp, "number value (%.17g)\n", n->numbr); + print_func(fp, "%.17g\n", n->numbr); else - fprintf(fp, "?? flags %s\n", flags2str(n->flags)); + print_func(fp, "?? flags %s\n", flags2str(n->flags)); } +/* get_varlist --- list of global variables */ -/* dump_vars --- dump the symbol table */ - -void -dump_vars(const char *fname) +NODE ** +get_varlist() { int i, j; NODE **table; NODE *p; - FILE *fp; - - emalloc(table, NODE **, var_count * sizeof(NODE *), "dump_vars"); - - if (fname == NULL) - fp = stderr; - else if ((fp = fopen(fname, "w")) == NULL) { - warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno)); - warning(_("sending profile to standard error")); - fp = stderr; - } + emalloc(table, NODE **, (var_count + 1) * sizeof(NODE *), "get_varlist"); + update_global_values(); for (i = j = 0; i < HASHSIZE; i++) for (p = variables[i]; p != NULL; p = p->hnext) table[j++] = p; - assert(j == var_count); /* Shazzam! */ - qsort(table, j, sizeof(NODE *), var_comp); + qsort(table, j, sizeof(NODE *), sym_comp); + + table[j] = NULL; + return table; +} - for (i = 0; i < j; i++) { - p = table[i]; +/* print_vars --- print names and values of global variables */ + +void +print_vars(int (*print_func)(FILE *, const char *, ...), FILE *fp) +{ + int i; + NODE **table; + NODE *p; + + table = get_varlist(); + for (i = 0; (p = table[i]) != NULL; i++) { if (p->hvalue->type == Node_func) continue; - fprintf(fp, "%.*s: ", (int) p->hlength, p->hname); + print_func(fp, "%.*s: ", (int) p->hlength, p->hname); if (p->hvalue->type == Node_var_array) - fprintf(fp, "array, %ld elements\n", p->hvalue->table_size); + print_func(fp, "array, %ld elements\n", p->hvalue->table_size); else if (p->hvalue->type == Node_var_new) - fprintf(fp, "unused variable\n"); + print_func(fp, "untyped variable\n"); else if (p->hvalue->type == Node_var) - valinfo(p->hvalue->var_value, fp); - else { - NODE **lhs = get_lhs(p->hvalue, NULL, FALSE); + valinfo(p->hvalue->var_value, print_func, fp); + } + efree(table); +} - valinfo(*lhs, fp); - } +/* dump_vars --- dump the symbol table */ + +void +dump_vars(const char *fname) +{ + FILE *fp; + + if (fname == NULL) + fp = stderr; + else if ((fp = fopen(fname, "w")) == NULL) { + warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno)); + warning(_("sending profile to standard error")); + fp = stderr; } + print_vars(fprintf, fp); if (fp != stderr && fclose(fp) != 0) warning(_("%s: close failed (%s)"), fname, strerror(errno)); - - free(table); } /* release_all_vars --- free all variable memory */ @@ -2873,8 +4001,8 @@ release_all_vars() { int i; NODE *p, *next; - - for (i = 0; i < HASHSIZE; i++) + + for (i = 0; i < HASHSIZE; i++) { for (p = variables[i]; p != NULL; p = next) { next = p->hnext; @@ -2882,40 +4010,14 @@ release_all_vars() continue; else if (p->hvalue->type == Node_var_array) assoc_clear(p->hvalue); - else if (p->hvalue->type != Node_var_new) { - NODE **lhs = get_lhs(p->hvalue, NULL, FALSE); - - unref(*lhs); - } - unref(p); - } -} - -/* finfo --- for use in comparison and sorting of function names */ - -struct finfo { - const char *name; - size_t nlen; - NODE *func; -}; - -/* fcompare --- comparison function for qsort */ - -static int -fcompare(const void *p1, const void *p2) -{ - const struct finfo *f1, *f2; - int minlen; + else if (p->hvalue->type != Node_var_new) + unref(p->hvalue->var_value); - f1 = (const struct finfo *) p1; - f2 = (const struct finfo *) p2; - - if (f1->nlen > f2->nlen) - minlen = f2->nlen; - else - minlen = f1->nlen; - - return strncmp(f1->name, f2->name, minlen); + efree(p->hname); + freenode(p->hvalue); + freenode(p); + } + } } /* dump_funcs --- print all functions */ @@ -2923,48 +4025,10 @@ fcompare(const void *p1, const void *p2) void dump_funcs() { - int i, j; - NODE *p; - struct finfo *tab = NULL; - - /* - * Walk through symbol table countng functions. - * Could be more than func_count if there are - * extension functions. - */ - for (i = j = 0; i < HASHSIZE; i++) { - for (p = variables[i]; p != NULL; p = p->hnext) { - if (p->hvalue->type == Node_func) { - j++; - } - } - } - - if (j == 0) + if (func_count <= 0) return; - emalloc(tab, struct finfo *, j * sizeof(struct finfo), "dump_funcs"); - - /* now walk again, copying info */ - for (i = j = 0; i < HASHSIZE; i++) { - for (p = variables[i]; p != NULL; p = p->hnext) { - if (p->hvalue->type == Node_func) { - tab[j].name = p->hname; - tab[j].nlen = p->hlength; - tab[j].func = p->hvalue; - j++; - } - } - } - - - /* Shazzam! */ - qsort(tab, j, sizeof(struct finfo), fcompare); - - for (i = 0; i < j; i++) - pp_func(tab[i].name, tab[i].nlen, tab[i].func); - - free(tab); + (void) foreach_func((int (*)(INSTRUCTION *, void *)) pp_func, TRUE, (void *) 0); } /* shadow_funcs --- check all functions for parameters that shadow globals */ @@ -2972,40 +4036,16 @@ dump_funcs() void shadow_funcs() { - int i, j; - NODE *p; - struct finfo *tab; static int calls = 0; int shadow = FALSE; - if (func_count == 0) + if (func_count <= 0) return; if (calls++ != 0) fatal(_("shadow_funcs() called twice!")); - emalloc(tab, struct finfo *, func_count * sizeof(struct finfo), "shadow_funcs"); - - for (i = j = 0; i < HASHSIZE; i++) { - for (p = variables[i]; p != NULL; p = p->hnext) { - if (p->hvalue->type == Node_func) { - tab[j].name = p->hname; - tab[j].nlen = p->hlength; - tab[j].func = p->hvalue; - j++; - } - } - } - - assert(j == func_count); - - /* Shazzam! */ - qsort(tab, func_count, sizeof(struct finfo), fcompare); - - for (i = 0; i < j; i++) - shadow |= parms_shadow(tab[i].name, tab[i].func); - - free(tab); + (void) foreach_func((int (*)(INSTRUCTION *, void *)) parms_shadow, TRUE, &shadow); /* End with fatal if the user requested it. */ if (shadow && lintfunc != warning) @@ -3013,54 +4053,6 @@ shadow_funcs() } /* - * append_right: - * Add new to the rightmost branch of LIST. This uses n^2 time, so we make - * a simple attempt at optimizing it. - */ - -static NODE * -append_right(NODE *list, NODE *new) -{ - register NODE *oldlist; - static NODE *savefront = NULL, *savetail = NULL; - - if (list == NULL || new == NULL) - return list; - - oldlist = list; - if (savefront == oldlist) - list = savetail; /* Be careful: maybe list->rnode != NULL */ - else - savefront = oldlist; - - while (list->rnode != NULL) - list = list->rnode; - savetail = list->rnode = new; - return oldlist; -} - -/* - * append_pattern: - * A wrapper around append_right, used for rule lists. - */ -static inline NODE * -append_pattern(NODE **list, NODE *patt) -{ - NODE *n = node(patt, Node_rule_node, (NODE *) NULL); - - if (*list == NULL) - *list = n; - else { - NODE *n1 = node(n, Node_rule_list, (NODE *) NULL); - if ((*list)->type != Node_rule_list) - *list = node(*list, Node_rule_list, n1); - else - (void) append_right(*list, n1); - } - return n; -} - -/* * func_install: * check if name is already installed; if so, it had better have Null value, * in which case def is added as the value. Otherwise, install name with def @@ -3071,92 +4063,117 @@ append_pattern(NODE **list, NODE *patt) * of each function parameter during a function call. See eval.c. */ -static void -func_install(NODE *params, NODE *def) +static int +func_install(INSTRUCTION *func, INSTRUCTION *def) { - NODE *r, *n, *thisfunc; - char **pnames, *names, *sp; - size_t pcount = 0, space = 0; + NODE *params; + NODE *r, *n, *thisfunc, *hp; + char **pnames = NULL; + char *fname; + int pcount = 0; int i; + params = func_params; + /* check for function foo(foo) { ... }. bleah. */ for (n = params->rnode; n != NULL; n = n->rnode) { - if (strcmp(n->param, params->param) == 0) - fatal(_("function `%s': can't use function name as parameter name"), - params->param); - else if (is_std_var(n->param)) - fatal(_("function `%s': can't use special variable `%s' as a function parameter"), + if (strcmp(n->param, params->param) == 0) { + error(_("function `%s': can't use function name as parameter name"), + params->param); + errcount++; + return -1; + } else if (is_std_var(n->param)) { + error(_("function `%s': can't use special variable `%s' as a function parameter"), params->param, n->param); + errcount++; + return -1; + } } - thisfunc = NULL; /* turn off warnings */ + thisfunc = NULL; /* turn off warnings */ - /* symbol table managment */ - pop_var(params, FALSE); - r = lookup(params->param); + fname = params->param; + /* symbol table management */ + hp = remove_symbol(params->param); /* remove function name out of symbol table */ + if (hp != NULL) + freenode(hp); + r = lookup(fname); if (r != NULL) { - fatal(_("function name `%s' previously defined"), params->param); - } else if (params->param == builtin_func) /* not a valid function name */ + error(_("function name `%s' previously defined"), fname); + errcount++; + return -1; + } else if (fname == builtin_func) /* not a valid function name */ goto remove_params; + /* add an implicit return at end; + * also used by 'return' command in debugger + */ + + (void) list_append(def, instruction(Op_push_i)); + def->lasti->memory = Nnull_string; + (void) list_append(def, instruction(Op_K_return)); + + if (do_profiling) + (void) list_prepend(def, instruction(Op_exec_count)); + + /* func->opcode is Op_func */ + (func + 1)->firsti = def->nexti; + (func + 1)->lasti = def->lasti; + (func + 2)->first_line = func->source_line; + (func + 2)->last_line = lastline; + + func->nexti = def->nexti; + bcfree(def); + + (void) list_append(rule_list, func + 1); /* debugging */ + /* install the function */ - thisfunc = node(params, Node_func, def); - (void) install(params->param, thisfunc); + thisfunc = mk_symbol(Node_func, params); + (void) install_symbol(fname, thisfunc); + thisfunc->code_ptr = func; + func->func_body = thisfunc; - /* figure out amount of space to allocate for variable names */ - for (n = params->rnode; n != NULL; n = n->rnode) { + for (n = params->rnode; n != NULL; n = n->rnode) pcount++; - space += strlen(n->param) + 1; - } - /* allocate it and fill it in */ if (pcount != 0) { - emalloc(names, char *, space, "func_install"); - emalloc(pnames, char **, pcount * sizeof(char *), "func_install"); - sp = names; - for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode) { - pnames[i] = sp; - strcpy(sp, n->param); - sp += strlen(n->param) + 1; - } - thisfunc->parmlist = pnames; - } else { - thisfunc->parmlist = NULL; + emalloc(pnames, char **, (pcount + 1) * sizeof(char *), "func_install"); + for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode) + pnames[i] = n->param; + pnames[pcount] = NULL; } + thisfunc->parmlist = pnames; /* update lint table info */ - func_use(params->param, FUNC_DEFINE); + func_use(fname, FUNC_DEFINE); - func_count++; /* used by profiling / pretty printer */ + func_count++; /* used in profiler / pretty printer */ remove_params: /* remove params from symbol table */ pop_params(params->rnode); + return 0; } -/* pop_var --- remove a variable from the symbol table */ +/* remove_symbol --- remove a variable from the symbol table */ -static void -pop_var(NODE *np, int freeit) +NODE * +remove_symbol(char *name) { - register NODE *bucket, **save; - register size_t len; - char *name; + NODE *bucket, **save; + size_t len; - name = np->param; len = strlen(name); save = &(variables[hash(name, len, (unsigned long) HASHSIZE, NULL)]); for (bucket = *save; bucket != NULL; bucket = bucket->hnext) { if (len == bucket->hlength && STREQN(bucket->hname, name, len)) { var_count--; *save = bucket->hnext; - freenode(bucket); - if (freeit) - free(np->param); - return; + return bucket; } save = &(bucket->hnext); } + return NULL; } /* pop_params --- remove list of function parameters from symbol table */ @@ -3168,10 +4185,13 @@ pop_var(NODE *np, int freeit) static void pop_params(NODE *params) { + NODE *hp; if (params == NULL) return; pop_params(params->rnode); - pop_var(params, TRUE); + hp = remove_symbol(params->param); + if (hp != NULL) + freenode(hp); } /* make_param --- make NAME into a function parameter */ @@ -3184,9 +4204,8 @@ make_param(char *name) getnode(r); r->type = Node_param_list; r->rnode = NULL; - r->param = name; r->param_cnt = param_counter++; - return (install(name, r)); + return (install_symbol(name, r)); } static struct fdesc { @@ -3240,6 +4259,9 @@ check_funcs() struct fdesc *fp, *next; int i; + if (get_context()->level > 0) + goto free_mem; + for (i = 0; i < HASHSIZE; i++) { for (fp = ftable[i]; fp != NULL; fp = fp->next) { #ifdef REALLYMEAN @@ -3261,29 +4283,98 @@ check_funcs() } } +free_mem: /* now let's free all the memory */ for (i = 0; i < HASHSIZE; i++) { for (fp = ftable[i]; fp != NULL; fp = next) { next = fp->next; - free(fp->name); - free(fp); + efree(fp->name); + efree(fp); } + ftable[i] = NULL; } } /* param_sanity --- look for parameters that are regexp constants */ static void -param_sanity(NODE *arglist) +param_sanity(INSTRUCTION *arglist) { - NODE *argp, *arg; - int i; + INSTRUCTION *argl, *arg; + int i = 1; - for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) { - arg = argp->lnode; - if (arg->type == Node_regex) + if (arglist == NULL) + return; + for (argl = arglist->nexti; argl; ) { + arg = argl->lasti; + if (arg->opcode == Op_match_rec) warning(_("regexp constant for parameter #%d yields boolean value"), i); + argl = arg->nexti; + i++; + } +} + +/* foreach_func --- execute given function for each awk function in symbol table. */ + +int +foreach_func(int (*pfunc)(INSTRUCTION *, void *), int sort, void *data) +{ + int i, j; + NODE *p; + int ret = 0; + + if (sort) { + NODE **tab; + + /* + * Walk through symbol table counting functions. + * Could be more than func_count if there are + * extension functions. + */ + for (i = j = 0; i < HASHSIZE; i++) { + for (p = variables[i]; p != NULL; p = p->hnext) { + if (p->hvalue->type == Node_func) { + j++; + } + } + } + + if (j == 0) + return 0; + + emalloc(tab, NODE **, j * sizeof(NODE *), "foreach_func"); + + /* now walk again, copying info */ + for (i = j = 0; i < HASHSIZE; i++) { + for (p = variables[i]; p != NULL; p = p->hnext) { + if (p->hvalue->type == Node_func) { + tab[j] = p; + j++; + } + } + } + + /* Shazzam! */ + qsort(tab, j, sizeof(NODE *), sym_comp); + + for (i = 0; i < j; i++) { + if ((ret = pfunc(tab[i]->hvalue->code_ptr, data)) != 0) + break; + } + + efree(tab); + return ret; + } + + /* unsorted */ + for (i = 0; i < HASHSIZE; i++) { + for (p = variables[i]; p != NULL; p = p->hnext) { + if (p->hvalue->type == Node_func + && (ret = pfunc(p->hvalue->code_ptr, data)) != 0) + return ret; + } } + return 0; } /* deferred variables --- those that are only defined if needed. */ @@ -3320,33 +4411,31 @@ register_deferred_variable(const char *name, NODE *(*load_func)(void)) /* variable --- make sure NAME is in the symbol table */ NODE * -variable(char *name, int can_free, NODETYPE type) +variable(char *name, NODETYPE type) { - register NODE *r; + NODE *r; if ((r = lookup(name)) != NULL) { - if (r->type == Node_func) - fatal(_("function `%s' called with space between name and `(',\nor used as a variable or an array"), + if (r->type == Node_func) { + error(_("function `%s' called with space between name and `(',\nor used as a variable or an array"), r->vname); - + errcount++; + r->type = Node_var_new; /* continue parsing instead of exiting */ + } } else { /* not found */ struct deferred_variable *dv; for (dv = deferred_variables; TRUE; dv = dv->next) { if (dv == NULL) { - /* - * This is the only case in which we may not - * free the string. - */ - NODE *n; - - if (type == Node_var_array) - n = node(NULL, type, NULL); + /* + * This is the only case in which we may not free the string. + */ + if (type == Node_var) + r = mk_symbol(type, Nnull_string); else - n = node(Nnull_string, type, NULL); - - return install(name, n); + r = mk_symbol(type, (NODE *) NULL); + return install_symbol(name, r); } if (STREQ(name, dv->name)) { r = (*dv->load_func)(); @@ -3354,31 +4443,55 @@ variable(char *name, int can_free, NODETYPE type) } } } - if (can_free) - free(name); + efree(name); return r; } -/* mk_rexp --- make a regular expression constant */ +/* make_regnode --- make a regular expression node */ static NODE * -mk_rexp(NODE *exp) +make_regnode(int type, NODE *exp) { NODE *n; - if (exp->type == Node_regex) - return exp; - getnode(n); - n->type = Node_dynregex; - n->re_exp = exp; - n->re_text = NULL; - n->re_reg = NULL; - n->re_flags = 0; + memset(n, 0, sizeof(NODE)); + n->type = type; n->re_cnt = 1; + + if (type == Node_regex) { + n->re_reg = make_regexp(exp->stptr, exp->stlen, FALSE, TRUE, FALSE); + if (n->re_reg == NULL) { + freenode(n); + return NULL; + } + n->re_exp = exp; + n->re_flags = CONSTANT; + } return n; } + +/* mk_rexp --- make a regular expression constant */ + +static NODE * +mk_rexp(INSTRUCTION *list) +{ + INSTRUCTION *ip; + + ip = list->nexti; + if (ip == list->lasti && ip->opcode == Op_match_rec) + ip->opcode = Op_push_re; + else { + ip = instruction(Op_push_re); + ip->memory = make_regnode(Node_dynregex, NULL); + ip->nexti = list->lasti->nexti; + list->lasti->nexti = ip; + list->lasti = ip; + } + return ip->memory; +} + /* isnoeffect --- when used as a statement, has no side effects */ /* @@ -3389,48 +4502,38 @@ mk_rexp(NODE *exp) */ static int -isnoeffect(NODETYPE type) +isnoeffect(OPCODE type) { switch (type) { - case Node_times: - case Node_quotient: - case Node_mod: - case Node_plus: - case Node_minus: - case Node_subscript: - case Node_concat: - case Node_exp: - case Node_unary_minus: - case Node_field_spec: - case Node_and: - case Node_or: - case Node_equal: - case Node_notequal: - case Node_less: - case Node_greater: - case Node_leq: - case Node_geq: - case Node_match: - case Node_nomatch: - case Node_not: - case Node_val: - case Node_in_array: - case Node_NF: - case Node_NR: - case Node_FNR: - case Node_FPAT: - case Node_FS: - case Node_RS: - case Node_FIELDWIDTHS: - case Node_IGNORECASE: - case Node_OFS: - case Node_ORS: - case Node_OFMT: - case Node_CONVFMT: - case Node_BINMODE: - case Node_LINT: - case Node_SUBSEP: - case Node_TEXTDOMAIN: + case Op_times: + case Op_times_i: + case Op_quotient: + case Op_quotient_i: + case Op_mod: + case Op_mod_i: + case Op_plus: + case Op_plus_i: + case Op_minus: + case Op_minus_i: + case Op_subscript: + case Op_concat: + case Op_exp: + case Op_exp_i: + case Op_unary_minus: + case Op_field_spec: + case Op_and_final: + case Op_or_final: + case Op_equal: + case Op_notequal: + case Op_less: + case Op_greater: + case Op_leq: + case Op_geq: + case Op_match: + case Op_nomatch: + case Op_match_rec: + case Op_not: + case Op_in_array: return TRUE; default: break; /* keeps gcc -Wall happy */ @@ -3439,45 +4542,34 @@ isnoeffect(NODETYPE type) return FALSE; } -/* isassignable --- can this node be assigned to? */ +/* make_assignable --- make this operand an assignable one if posiible */ -static int -isassignable(register NODE *n) +static INSTRUCTION * +make_assignable(INSTRUCTION *ip) { - switch (n->type) { - case Node_var_new: - case Node_var: - case Node_FIELDWIDTHS: - case Node_RS: - case Node_FS: - case Node_FNR: - case Node_FPAT: - case Node_NR: - case Node_NF: - case Node_IGNORECASE: - case Node_OFMT: - case Node_CONVFMT: - case Node_ORS: - case Node_OFS: - case Node_LINT: - case Node_BINMODE: - case Node_SUBSEP: - case Node_TEXTDOMAIN: - case Node_field_spec: - case Node_subscript: - return TRUE; - case Node_param_list: - return ((n->flags & FUNC) == 0); /* ok if not func name */ + switch (ip->opcode) { + case Op_push: + if (ip->memory->type == Node_param_list + && (ip->memory->flags & FUNC) != 0) + return NULL; + ip->opcode = Op_push_lhs; + return ip; + case Op_field_spec: + ip->opcode = Op_field_spec_lhs; + return ip; + case Op_subscript: + ip->opcode = Op_subscript_lhs; + return ip; default: break; /* keeps gcc -Wall happy */ } - return FALSE; + return NULL; } /* stopme --- for debugging */ NODE * -stopme(NODE *tree ATTRIBUTE_UNUSED) +stopme(int nargs ATTRIBUTE_UNUSED) { return (NODE *) 0; } @@ -3499,7 +4591,7 @@ dumpintlstr(const char *str, size_t len) } printf("msgid "); - pp_string_fp(stdout, str, len, '"', TRUE); + pp_string_fp(fprintf, stdout, str, len, '"', TRUE); putchar('\n'); printf("msgstr \"\"\n\n"); fflush(stdout); @@ -3522,35 +4614,15 @@ dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2) } printf("msgid "); - pp_string_fp(stdout, str1, len1, '"', TRUE); + pp_string_fp(fprintf, stdout, str1, len1, '"', TRUE); putchar('\n'); printf("msgid_plural "); - pp_string_fp(stdout, str2, len2, '"', TRUE); + pp_string_fp(fprintf, stdout, str2, len2, '"', TRUE); putchar('\n'); printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n"); fflush(stdout); } -/* count_args --- count the number of printf arguments */ - -static void -count_args(NODE *tree) -{ - size_t count = 0; - NODE *save_tree; - - assert(tree->type == Node_K_printf - || (tree->type == Node_builtin && tree->builtin == do_sprintf)); - save_tree = tree; - - tree = tree->lnode; /* printf format string */ - - for (count = 0; tree != NULL; tree = tree->rnode) - count++; - - save_tree->printf_count = count; -} - /* isarray --- can this type be subscripted? */ static int @@ -3572,6 +4644,1194 @@ isarray(NODE *n) return FALSE; } + +static INSTRUCTION * +mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op) +{ + INSTRUCTION *ip,*ip1; + AWKNUM res; + + ip = s2->nexti; + if (s2->lasti == ip && ip->opcode == Op_push_i) { + /* do any numeric constant folding */ + ip1 = s1->nexti; + if (ip1->memory != NULL && ip1->memory->type == Node_val + && (ip1->memory->flags & (STRCUR|STRING)) == 0 + && ip->memory != NULL && ip->memory->type == Node_val + && (ip->memory->flags & (STRCUR|STRING)) == 0 + && ip1 == s1->lasti && do_optimize > 1) { + ip1->memory->numbr = force_number(ip1->memory); + ip->memory->numbr = force_number(ip->memory); + res = ip1->memory->numbr; + switch (op->opcode) { + case Op_times: + res *= ip->memory->numbr; + break; + case Op_quotient: + if (ip->memory->numbr == 0) { + /* don't fatalize, allow parsing rest of the input */ + yyerror(_("division by zero attempted")); + goto regular; + } + + res /= ip->memory->numbr; + break; + case Op_mod: + if (ip->memory->numbr == 0) { + /* don't fatalize, allow parsing rest of the input */ + yyerror(_("division by zero attempted in `%%'")); + goto regular; + } +#ifdef HAVE_FMOD + res = fmod(res, ip->memory->numbr); +#else /* ! HAVE_FMOD */ + (void) modf(res / ip->memory->numbr, &res); + res = ip1->memory->numbr - res * ip->memory->numbr; +#endif /* ! HAVE_FMOD */ + break; + case Op_plus: + res += ip->memory->numbr; + break; + case Op_minus: + res -= ip->memory->numbr; + break; + case Op_exp: + res = calc_exp(res, ip->memory->numbr); + break; + default: + goto regular; + } + + op->opcode = Op_push_i; + op->memory = mk_number(res, (PERM|NUMCUR|NUMBER)); + bcfree(ip1); + bcfree(ip); + bcfree(s1); + bcfree(s2); + return list_create(op); + } else { + /* do basic arithmetic optimisation */ + /* convert (Op_push_i Node_val) + (Op_plus) to (Op_plus_i Node_val) */ + switch (op->opcode) { + case Op_times: + op->opcode = Op_times_i; + break; + case Op_quotient: + op->opcode = Op_quotient_i; + break; + case Op_mod: + op->opcode = Op_mod_i; + break; + case Op_plus: + op->opcode = Op_plus_i; + break; + case Op_minus: + op->opcode = Op_minus_i; + break; + case Op_exp: + op->opcode = Op_exp_i; + break; + default: + goto regular; + } + + op->memory = ip->memory; + bcfree(ip); + bcfree(s2); /* Op_list */ + return list_append(s1, op); + } + } + +regular: + /* append lists s1, s2 and add `op' bytecode */ + (void) list_merge(s1, s2); + return list_append(s1, op); +} + +/* mk_boolean --- instructions for boolean and, or */ + +static INSTRUCTION * +mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op) +{ + INSTRUCTION *tp; + OPCODE opc, final_opc; + + opc = op->opcode; /* Op_and or Op_or */ + final_opc = (opc == Op_or) ? Op_or_final : Op_and_final; + + add_lint(right, LINT_assign_in_cond); + + tp = left->lasti; + + if (tp->opcode != final_opc) { /* x || y */ + list_append(right, instruction(final_opc)); + add_lint(left, LINT_assign_in_cond); + (void) list_append(left, op); + left->lasti->target_jmp = right->lasti; + + /* NB: target_stmt points to previous Op_and(Op_or) in a chain; + * target_stmt only used in the parser (see below). + */ + + left->lasti->target_stmt = left->lasti; + right->lasti->target_stmt = left->lasti; + } else { /* optimization for x || y || z || ... */ + INSTRUCTION *ip; + + op->opcode = final_opc; + (void) list_append(right, op); + op->target_stmt = tp; + tp->opcode = opc; + tp->target_jmp = op; + + /* update jump targets */ + for (ip = tp->target_stmt; ; ip = ip->target_stmt) { + assert(ip->opcode == opc); + assert(ip->target_jmp == tp); + /* if (ip->opcode == opc && ip->target_jmp == tp) */ + ip->target_jmp = op; + if (ip->target_stmt == ip) + break; + } + } + + return list_merge(left, right); +} + +/* mk_condition --- if-else and conditional */ + +static INSTRUCTION * +mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch, + INSTRUCTION *elsep, INSTRUCTION *false_branch) +{ + /* + * ---------------- + * cond + * ---------------- + * t: [Op_jmp_false f ] + * ---------------- + * true_branch + * + * ---------------- + * [Op_jmp y] + * ---------------- + * f: + * false_branch + * ---------------- + * y: [Op_no_op] + * ---------------- + */ + + INSTRUCTION *ip; + + /* FIXME else { } -- add elsep */ + + if (false_branch == NULL) { + if (elsep != NULL) /* else { } */ + false_branch = list_append(list_create(elsep), instruction(Op_no_op)); + else + false_branch = list_create(instruction(Op_no_op)); + } else { + /* assert(elsep != NULL); */ + + /* avoid a series of no_op's: if .. else if .. else if .. */ + if (false_branch->lasti->opcode != Op_no_op) + (void) list_append(false_branch, instruction(Op_no_op)); + (void) list_prepend(false_branch, elsep); + false_branch->nexti->branch_end = false_branch->lasti; + if (do_profiling) + (void) list_prepend(false_branch, instruction(Op_exec_count)); + } + + (void) list_prepend(false_branch, instruction(Op_jmp)); + false_branch->nexti->target_jmp = false_branch->lasti; + + add_lint(cond, LINT_assign_in_cond); + ip = list_append(cond, instruction(Op_jmp_false)); + ip->lasti->target_jmp = false_branch->nexti->nexti; + + (void) list_prepend(ip, ifp); + if (do_profiling) { + (void) list_append(ip, instruction(Op_exec_count)); + ip->nexti->branch_if = ip->lasti; + ip->nexti->branch_else = false_branch->nexti; + } + + if (true_branch != NULL) + list_merge(ip, true_branch); + return list_merge(ip, false_branch); +} + +enum defline { FIRST_LINE, LAST_LINE }; + +/* find_line -- find the first(last) line in a list of (pattern) instructions */ + +static int +find_line(INSTRUCTION *pattern, enum defline what) +{ + INSTRUCTION *ip; + int lineno = 0; + + for (ip = pattern->nexti; ip; ip = ip->nexti) { + if (what == LAST_LINE) { + if (ip->source_line > lineno) + lineno = ip->source_line; + } else { /* FIRST_LINE */ + if (ip->source_line > 0 + && (lineno == 0 || ip->source_line < lineno)) + lineno = ip->source_line; + } + if (ip == pattern->lasti) + break; + } + assert(lineno > 0); + return lineno; +} + +/* append_rule --- pattern-action instructions */ + +static INSTRUCTION * +append_rule(INSTRUCTION *pattern, INSTRUCTION *action) +{ + /* + * ---------------- + * pattern + * ---------------- + * [Op_jmp_false f ] + * ---------------- + * action + * ---------------- + * f: [Op_no_op ] + * ---------------- + */ + + INSTRUCTION *rp; + INSTRUCTION *tp; + INSTRUCTION *ip; + + if (rule != Rule) { + rp = pattern; + if (do_profiling) + (void) list_append(action, instruction(Op_no_op)); + (rp + 1)->firsti = action->nexti; + (rp + 1)->lasti = action->lasti; + (rp + 2)->first_line = pattern->source_line; + (rp + 2)->last_line = lastline; + ip = list_prepend(action, rp); + + } else { + rp = bcalloc(Op_rule, 3, 0); + rp->in_rule = Rule; + rp->source_file = source; + tp = instruction(Op_no_op); + + if (pattern == NULL) { + /* assert(action != NULL); */ + if (do_profiling) + (void) list_prepend(action, instruction(Op_exec_count)); + (rp + 1)->firsti = action->nexti; + (rp + 1)->lasti = tp; + (rp + 2)->first_line = firstline; + (rp + 2)->last_line = lastline; + rp->source_line = firstline; + ip = list_prepend(list_append(action, tp), rp); + } else { + (void) list_append(pattern, instruction(Op_jmp_false)); + pattern->lasti->target_jmp = tp; + (rp + 2)->first_line = find_line(pattern, FIRST_LINE); + rp->source_line = (rp + 2)->first_line; + if (action == NULL) { + (rp + 2)->last_line = find_line(pattern, LAST_LINE); + action = list_create(instruction(Op_K_print_rec)); + if (do_profiling) + (void) list_prepend(action, instruction(Op_exec_count)); + } else + (rp + 2)->last_line = lastline; + + if (do_profiling) { + (void) list_prepend(pattern, instruction(Op_exec_count)); + (void) list_prepend(action, instruction(Op_exec_count)); + } + (rp + 1)->firsti = action->nexti; + (rp + 1)->lasti = tp; + ip = list_append( + list_merge(list_prepend(pattern, rp), + action), + tp); + } + + } + + list_append(rule_list, rp + 1); + + if (rule_block[rule] == NULL) + rule_block[rule] = ip; + else + (void) list_merge(rule_block[rule], ip); + + return rule_block[rule]; +} + +/* mk_assignment --- assignment bytecodes */ + +static INSTRUCTION * +mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op) +{ + INSTRUCTION *tp; + INSTRUCTION *ip; + + tp = lhs->lasti; + switch (tp->opcode) { + case Op_field_spec: + tp->opcode = Op_field_spec_lhs; + break; + case Op_subscript: + tp->opcode = Op_subscript_lhs; + break; + case Op_push: + case Op_push_array: + tp->opcode = Op_push_lhs; + break; + default: + cant_happen(); + } + + tp->do_reference = (op->opcode != Op_assign); /* check for uninitialized reference */ + + if (rhs != NULL) + ip = list_merge(rhs, lhs); + else + ip = lhs; + + (void) list_append(ip, op); + + if (tp->opcode == Op_push_lhs + && tp->memory->type == Node_var + && tp->memory->var_assign + ) { + tp->do_reference = FALSE; /* no uninitialized reference checking + * for a special variable. + */ + (void) list_append(ip, instruction(Op_var_assign)); + ip->lasti->memory = tp->memory; + } else if (tp->opcode == Op_field_spec_lhs) { + (void) list_append(ip, instruction(Op_field_assign)); + ip->lasti->field_assign = (Func_ptr) 0; + tp->target_assign = ip->lasti; + } + + return ip; +} + +/* optimize_assignment --- peephole optimization for assignment */ + +static INSTRUCTION * +optimize_assignment(INSTRUCTION *exp) +{ + INSTRUCTION *i1; + INSTRUCTION *i2; + INSTRUCTION *i3; + + /* + * Optimize assignment statements array[subs] = x; var = x; $n = x; + * string concatenation of the form s = s t. + * + * 1) Array element assignment array[subs] = x: + * Replaces Op_push_array + Op_subscript_lhs + Op_assign + Op_pop + * with single instruction Op_store_sub. + * Limitation (FIXME): 1 dimension and sub is simple var/value. + * + * 2) Simple variable assignment var = x: + * Replaces Op_push_lhs + Op_assign + Op_pop with Op_store_var. + * + * 3) Field assignment $n = x: + * Replaces Op_field_spec_lhs + Op_assign + Op_field_assign + Op_pop + * with Op_store_field. + * + * 4) Optimization for string concatenation: + * For cases like x = x y, uses realloc to include y in x; + * also eliminates instructions Op_push_lhs and Op_pop. + */ + + /* + * N.B.: do not append Op_pop instruction to the returned + * instruction list if optimized. None of these + * optimized instructions push the r-value of assignment + * onto the runtime stack. + */ + + i2 = NULL; + i1 = exp->lasti; + + if ( ! do_optimize + || ( i1->opcode != Op_assign + && i1->opcode != Op_field_assign) + ) + return list_append(exp, instruction(Op_pop)); + + for (i2 = exp->nexti; i2 != i1; i2 = i2->nexti) { + switch (i2->opcode) { + case Op_concat: + if (i2->nexti->opcode == Op_push_lhs /* l.h.s is a simple variable */ + && (i2->concat_flag & CSVAR) /* 1st exp in r.h.s is a simple variable; + * see Op_concat in the grammer above. + */ + && i2->nexti->memory == exp->nexti->memory /* and the same as in l.h.s */ + && i2->nexti->nexti == i1 + && i1->opcode == Op_assign + ) { + /* s = s ... optimization */ + + /* avoid stuff like x = x (x = y) or x = x gsub(/./, "b", x); + * check for l-value reference to this variable in the r.h.s. + * Also avoid function calls in general, to guard against + * global variable assignment. + */ + + for (i3 = exp->nexti->nexti; i3 != i2; i3 = i3->nexti) { + if ((i3->opcode == Op_push_lhs && i3->memory == i2->nexti->memory) + || i3->opcode == Op_func_call) + return list_append(exp, instruction(Op_pop)); /* no optimization */ + } + + /* remove the variable from r.h.s */ + i3 = exp->nexti; + exp->nexti = i3->nexti; + bcfree(i3); + + if (--i2->expr_count == 1) /* one less expression in Op_concat */ + i2->opcode = Op_no_op; + + i3 = i2->nexti; + assert(i3->opcode == Op_push_lhs); + i3->opcode = Op_assign_concat; /* change Op_push_lhs to Op_assign_concat */ + i3->nexti = NULL; + bcfree(i1); /* Op_assign */ + exp->lasti = i3; /* update Op_list */ + return exp; + } + break; + + case Op_field_spec_lhs: + if (i2->nexti->opcode == Op_assign + && i2->nexti->nexti == i1 + && i1->opcode == Op_field_assign + ) { + /* $n = .. */ + i2->opcode = Op_store_field; + bcfree(i2->nexti); /* Op_assign */ + i2->nexti = NULL; + bcfree(i1); /* Op_field_assign */ + exp->lasti = i2; /* update Op_list */ + return exp; + } + break; + + case Op_push_array: + if (i2->nexti->nexti->opcode == Op_subscript_lhs) { + i3 = i2->nexti->nexti; + if (i3->sub_count == 1 + && i3->nexti == i1 + && i1->opcode == Op_assign + ) { + /* array[sub] = .. */ + i3->opcode = Op_store_sub; + i3->memory = i2->memory; + i3->expr_count = 1; /* sub_count shadows memory, + * so use expr_count instead. + */ + i3->nexti = NULL; + i2->opcode = Op_no_op; + bcfree(i1); /* Op_assign */ + exp->lasti = i3; /* update Op_list */ + return exp; + } + } + break; + + case Op_push_lhs: + if (i2->nexti == i1 + && i1->opcode == Op_assign + ) { + /* var = .. */ + i2->opcode = Op_store_var; + i2->nexti = NULL; + bcfree(i1); /* Op_assign */ + exp->lasti = i2; /* update Op_list */ + return exp; + } + break; + + default: + break; + } + } + + /* no optimization */ + return list_append(exp, instruction(Op_pop)); +} + + +/* mk_getline --- make instructions for getline */ + +static INSTRUCTION * +mk_getline(INSTRUCTION *op, INSTRUCTION *var, INSTRUCTION *redir, OPCODE redirtype) +{ + INSTRUCTION *ip; + INSTRUCTION *tp; + INSTRUCTION *asgn = NULL; + + /* + * getline [var] < [file] + * + * [ file (simp_exp)] + * [ [ var ] ] + * [ Op_K_getline_redir|NULL|redir_type|into_var] + * [ [var_assign] ] + * + */ + + if (redir == NULL) { + int sline = op->source_line; + bcfree(op); + op = bcalloc(Op_K_getline, 2, sline); + (op + 1)->target_endfile = ip_endfile; + (op + 1)->target_beginfile = ip_beginfile; + } + + if (var != NULL) { + tp = make_assignable(var->lasti); + assert(tp != NULL); + + /* check if we need after_assign bytecode */ + if (tp->opcode == Op_push_lhs + && tp->memory->type == Node_var + && tp->memory->var_assign + ) { + asgn = instruction(Op_var_assign); + asgn->memory = tp->memory; + } else if (tp->opcode == Op_field_spec_lhs) { + asgn = instruction(Op_field_assign); + asgn->field_assign = (Func_ptr) 0; /* determined at run time */ + tp->target_assign = asgn; + } + if (redir != NULL) { + ip = list_merge(redir, var); + (void) list_append(ip, op); + } else + ip = list_append(var, op); + } else if (redir != NULL) + ip = list_append(redir, op); + else + ip = list_create(op); + op->into_var = (var != NULL); + op->redir_type = (redir != NULL) ? redirtype : 0; + + return (asgn == NULL ? ip : list_append(ip, asgn)); +} + + +/* mk_for_loop --- for loop bytecodes */ + +static INSTRUCTION * +mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond, + INSTRUCTION *incr, INSTRUCTION *body) +{ + /* + * [ Op_push_loop | z| y] <-- continue | break + * ------------------------ + * init (may be NULL) + * ------------------------ + * x: + * cond (Op_no_op if NULL) + * ------------------------ + * [ Op_jmp_false y ] + * ------------------------ + * body (may be NULL) + * ------------------------ + * z: + * incr (may be NULL) + * [ Op_jmp x ] + * ------------------------ + * y:[ Op_pop_loop ] + */ + + INSTRUCTION *ip; + INSTRUCTION *cp; + INSTRUCTION *jmp; + INSTRUCTION *pp_cond; + INSTRUCTION *ret; + + cp = instruction(Op_pop_loop); + + forp->opcode = Op_push_loop; + forp->target_break = cp; + ip = list_create(forp); + + if (init != NULL) + (void) list_merge(ip, init); + + if (cond != NULL) { + add_lint(cond, LINT_assign_in_cond); + pp_cond = cond->nexti; + (void) list_merge(ip, cond); + (void) list_append(ip, instruction(Op_jmp_false)); + ip->lasti->target_jmp = cp; + } else { + pp_cond = instruction(Op_no_op); + (void) list_append(ip, pp_cond); + } + + if (do_profiling) { + (void) list_append(ip, instruction(Op_exec_count)); + (forp + 1)->opcode = Op_K_for; + (forp + 1)->forloop_cond = pp_cond; + (forp + 1)->forloop_body = ip->lasti; + } + + if (body != NULL) + (void) list_merge(ip, body); + + if (incr != NULL) { + forp->target_continue = incr->nexti; + (void) list_merge(ip, incr); + } + jmp = instruction(Op_jmp); + jmp->target_jmp = pp_cond; + if (incr == NULL) + forp->target_continue = jmp; + (void) list_append(ip, jmp); + + ret = list_append(ip, cp); + + fix_break_continue(forp, cp, TRUE); + + return ret; +} + +/* add_lint --- add lint warning bytecode if needed */ + +static void +add_lint(INSTRUCTION *list, LINTTYPE linttype) +{ +#ifndef NO_LINT + INSTRUCTION *ip; + + switch (linttype) { + case LINT_assign_in_cond: + ip = list->lasti; + if (ip->opcode == Op_var_assign || ip->opcode == Op_field_assign) { + assert(ip != list->nexti); + for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti) + ; + } + + if (ip->opcode == Op_assign || ip->opcode == Op_assign_concat) { + list_append(list, instruction(Op_lint)); + list->lasti->lint_type = linttype; + } + break; + + case LINT_no_effect: + if (list->lasti->opcode == Op_pop && list->nexti != list->lasti) { + for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti) + ; + + if (do_lint) { /* compile-time warning */ + if (isnoeffect(ip->opcode)) + lintwarn(_("statement may have no effect")); + } + + if (ip->opcode == Op_push) { /* run-time warning */ + list_append(list, instruction(Op_lint)); + list->lasti->lint_type = linttype; + } + } + break; + + default: + break; + } +#endif +} + +/* mk_expression_list --- list of bytecode lists */ + +static INSTRUCTION * +mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1) +{ + INSTRUCTION *r; + + /* we can't just combine all bytecodes, since we need to + * process individual expressions for a few builtins in snode() (-: + */ + + /* -- list of lists */ + /* [Op_list| ... ]------ + * | + * [Op_list| ... ] -- | + * ... | | + * ... <------- | + * [Op_list| ... ] -- | + * ... | | + * ... | | + * ... <------- -- + */ + + assert(s1 != NULL && s1->opcode == Op_list); + if (list == NULL) { + list = instruction(Op_list); + list->nexti = s1; + list->lasti = s1->lasti; + return list; + } + + /* append expression to the end of the list */ + + r = list->lasti; + r->nexti = s1; + list->lasti = s1->lasti; + return list; +} + +/* count_expressions --- fixup expression_list from mk_expression_list. + * returns no of expressions in list. isarg is true + * for function arguments. + */ + +static int +count_expressions(INSTRUCTION **list, int isarg) +{ + INSTRUCTION *expr; + INSTRUCTION *r = NULL; + int count = 0; + + if (*list == NULL) /* error earlier */ + return 0; + + for (expr = (*list)->nexti; expr; ) { + INSTRUCTION *t1, *t2; + t1 = expr->nexti; + t2 = expr->lasti; + if (isarg && t1 == t2 && t1->opcode == Op_push) + t1->opcode = Op_push_param; + if (++count == 1) + r = expr; + else + (void) list_merge(r, expr); + expr = t2->nexti; + } + + assert(count > 0); + if (! isarg && count > max_args) + max_args = count; + bcfree(*list); + *list = r; + return count; +} + +/* fix_break_continue --- fix up break & continue nodes in loop bodies */ + +static void +fix_break_continue(INSTRUCTION *start, INSTRUCTION *end, int check_continue) +{ + INSTRUCTION *ip, *b_target, *c_target; + + assert(start->opcode == Op_push_loop); + assert(end->opcode == Op_pop_loop); + + b_target = start->target_break; + c_target = start->target_continue; + + for (ip = start; ip != end; ip = ip->nexti) { + switch (ip->opcode) { + case Op_K_break: + if (ip->target_jmp == NULL) + ip->target_jmp = b_target; + break; + + case Op_K_continue: + if (check_continue && ip->target_jmp == NULL) + ip->target_jmp = c_target; + break; + + default: + /* this is to keep the compiler happy. sheesh. */ + break; + } + } +} + + +/* append_symbol --- append symbol to the list of symbols + * installed in the symbol table. + */ + +void +append_symbol(char *name) +{ + NODE *hp; + + /* N.B.: func_install removes func name and reinstalls it; + * and we get two entries for it here!. destroy_symbol() + * will find and destroy the Node_func which is what we want. + */ + + getnode(hp); + hp->hname = name; /* shallow copy */ + hp->hnext = symbol_list->hnext; + symbol_list->hnext = hp; +} + +void +release_symbols(NODE *symlist, int keep_globals) +{ + NODE *hp, *n; + + for (hp = symlist->hnext; hp != NULL; hp = n) { + if (! keep_globals) { + /* destroys globals, function, and params + * if still in symbol table and not removed by func_install + * due to parse error. + */ + destroy_symbol(hp->hname); + } + n = hp->hnext; + freenode(hp); + } + symlist->hnext = NULL; +} + +/* destroy_symbol --- remove a symbol from symbol table +* and free all associated memory. +*/ + + +void +destroy_symbol(char *name) +{ + NODE *symbol, *hp; + + symbol = lookup(name); + if (symbol == NULL) + return; + + if (symbol->type == Node_func) { + char **varnames; + NODE *func, *n; + + func = symbol; + varnames = func->parmlist; + if (varnames != NULL) + efree(varnames); + + /* function parameters of type Node_param_list */ + for (n = func->lnode->rnode; n != NULL; ) { + NODE *np; + np = n->rnode; + efree(n->param); + freenode(n); + n = np; + } + freenode(func->lnode); + func_count--; + + } else if (symbol->type == Node_var_array) + assoc_clear(symbol); + else if (symbol->type == Node_var) + unref(symbol->var_value); + + /* remove from symbol table */ + hp = remove_symbol(name); + efree(hp->hname); + freenode(hp->hvalue); + freenode(hp); +} + +#define pool_size d.dl +#define freei x.xi +static INSTRUCTION *pool_list; +static CONTEXT *curr_ctxt = NULL; + + +CONTEXT * +new_context() +{ + CONTEXT *ctxt; + + emalloc(ctxt, CONTEXT *, sizeof(CONTEXT), "new_context"); + memset(ctxt, 0, sizeof(CONTEXT)); + ctxt->srcfiles.next = ctxt->srcfiles.prev = &ctxt->srcfiles; + ctxt->rule_list.opcode = Op_list; + ctxt->rule_list.lasti = &ctxt->rule_list; + if (curr_ctxt == NULL) { + ctxt->level = 0; + pool_list = &ctxt->pools; + symbol_list = &ctxt->symbols; + srcfiles = &ctxt->srcfiles; + rule_list = &ctxt->rule_list; + install_func = ctxt->install_func; + curr_ctxt = ctxt; + } else + ctxt->level = curr_ctxt->level + 1; /* this assumes contexts don't overlap each other ? */ + + return ctxt; +} + + +/* N.B.: new context (level > 0) inherits all command line options; + * probably should restore defaults for lint etc. + */ + +CONTEXT * +set_context(CONTEXT *ctxt) +{ + assert(curr_ctxt != NULL); + if (curr_ctxt == ctxt) + goto update; + + /* save current source and sourceline */ + curr_ctxt->sourceline = sourceline; + curr_ctxt->source = source; + + if (ctxt == NULL) { + assert(curr_ctxt->prev != NULL); + ctxt = curr_ctxt->prev; + } else + ctxt->prev = curr_ctxt; + + /* restore source and sourceline */ + sourceline = ctxt->sourceline; + source = ctxt->source; + +update: + pool_list = &ctxt->pools; + symbol_list = &ctxt->symbols; + srcfiles = &ctxt->srcfiles; + rule_list = &ctxt->rule_list; + install_func = ctxt->install_func; + curr_ctxt = ctxt; + return curr_ctxt; +} + +CONTEXT * +get_context() +{ + assert(curr_ctxt != NULL); + return curr_ctxt; +} + +void +free_context(CONTEXT *ctxt, int keep_globals) +{ + SRCFILE *s, *sn; + + if (ctxt == NULL) + return; + + assert(curr_ctxt != ctxt); + + /* free all code including function codes */ + free_bcpool(&ctxt->pools); + /* free symbols */ + release_symbols(&ctxt->symbols, keep_globals); + /* free srcfiles */ + for (s = &ctxt->srcfiles; s != &ctxt->srcfiles; s = sn) { + sn = s->next; + if (s->stype != SRC_CMDLINE && s->stype != SRC_STDIN) + efree(s->fullpath); + efree(s->src); + efree(s); + } + efree(ctxt); +} + +static void +free_bc_internal(INSTRUCTION *cp) +{ + INSTRUCTION *curr; + NODE *m; + + switch(cp->opcode) { + case Op_func_call: + if (cp->func_name != NULL + && cp->func_name != builtin_func + ) + efree(cp->func_name); + break; + case Op_K_switch: + for (curr = cp->case_val; curr != NULL; curr = curr->nexti) { + if (curr->opcode == Op_K_case && + curr->memory->type != Node_val + ) { + m = curr->memory; + if (m->re_text != NULL) + unref(m->re_text); + if (m->re_reg != NULL) + refree(m->re_reg); + if (m->re_exp != NULL) + unref(m->re_exp); + freenode(m); + } + } + break; + case Op_push_re: + case Op_match_rec: + case Op_match: + case Op_nomatch: + m = cp->memory; + if (m->re_reg != NULL) + refree(m->re_reg); + if (m->re_exp != NULL) + unref(m->re_exp); + if (m->re_text != NULL) + unref(m->re_text); + freenode(m); + break; + case Op_token: /* token lost during error recovery in yyparse */ + if (cp->lextok != NULL) + efree(cp->lextok); + break; + case Op_illegal: + cant_happen(); + default: + break; + } +} + + +/* INSTR_CHUNK must be > largest code size (3) */ +#define INSTR_CHUNK 127 + +/* bcfree --- deallocate instruction */ + +void +bcfree(INSTRUCTION *cp) +{ + cp->opcode = 0; + cp->nexti = pool_list->freei; + pool_list->freei = cp; +} + + +/* bcalloc --- allocate a new instruction */ + +INSTRUCTION * +bcalloc(OPCODE op, int size, int srcline) +{ + INSTRUCTION *cp; + + if (size > 1) { + /* wide instructions Op_rule, Op_func_call .. */ + emalloc(cp, INSTRUCTION *, (size + 1) * sizeof(INSTRUCTION), "bcalloc"); + cp->pool_size = size; + cp->nexti = pool_list->nexti; + pool_list->nexti = cp++; + } else { + INSTRUCTION *pool; + + pool = pool_list->freei; + if (pool == NULL) { + INSTRUCTION *last; + emalloc(cp, INSTRUCTION *, (INSTR_CHUNK + 1) * sizeof(INSTRUCTION), "bcalloc"); + + cp->pool_size = INSTR_CHUNK; + cp->nexti = pool_list->nexti; + pool_list->nexti = cp; + pool = ++cp; + last = &pool[INSTR_CHUNK - 1]; + for (; cp <= last; cp++) { + cp->opcode = 0; + cp->nexti = cp + 1; + } + --cp; + cp->nexti = NULL; + } + cp = pool; + pool_list->freei = cp->nexti; + } + + memset(cp, 0, size * sizeof(INSTRUCTION)); + cp->opcode = op; + cp->source_line = srcline; + return cp; +} + + +static void +free_bcpool(INSTRUCTION *pl) +{ + INSTRUCTION *pool, *tmp; + + for (pool = pl->nexti; pool != NULL; pool = tmp) { + INSTRUCTION *cp, *last; + long psiz; + psiz = pool->pool_size; + if (psiz == INSTR_CHUNK) + last = pool + psiz; + else + last = pool + 1; + for (cp = pool + 1; cp <= last ; cp++) { + if (cp->opcode != 0) + free_bc_internal(cp); + } + tmp = pool->nexti; + efree(pool); + } + memset(pl, 0, sizeof(INSTRUCTION)); +} + + +static inline INSTRUCTION * +list_create(INSTRUCTION *x) +{ + INSTRUCTION *l; + + l = instruction(Op_list); + l->nexti = x; + l->lasti = x; + return l; +} + +static inline INSTRUCTION * +list_append(INSTRUCTION *l, INSTRUCTION *x) +{ +#ifdef GAWKDEBUG + if (l->opcode != Op_list) + cant_happen(); +#endif + l->lasti->nexti = x; + l->lasti = x; + return l; +} + +static inline INSTRUCTION * +list_prepend(INSTRUCTION *l, INSTRUCTION *x) +{ +#ifdef GAWKDEBUG + if (l->opcode != Op_list) + cant_happen(); +#endif + x->nexti = l->nexti; + l->nexti = x; + return l; +} + +static inline INSTRUCTION * +list_merge(INSTRUCTION *l1, INSTRUCTION *l2) +{ +#ifdef GAWKDEBUG + if (l1->opcode != Op_list) + cant_happen(); + if (l2->opcode != Op_list) + cant_happen(); +#endif + l1->lasti->nexti = l2->nexti; + l1->lasti = l2->lasti; + bcfree(l2); + return l1; +} + /* See if name is a special token. */ int @@ -3589,7 +5849,6 @@ check_special(const char *name) } #endif - low = 0; high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1; while (low <= high) { @@ -3628,7 +5887,7 @@ read_one_line(int fd, void *buffer, size_t count) fp = fdopen(fd, "r"); if (fp == NULL) { fprintf(stderr, "ugh. fdopen: %s\n", strerror(errno)); - exit(EXIT_FAILURE); + gawk_exit(EXIT_FAILURE); } } @@ -3654,161 +5913,3 @@ one_line_close(int fd) return ret; } -/* constant_fold --- try to fold constant operations */ - -static NODE * -constant_fold(NODE *left, NODETYPE op, NODE *right) -{ - AWKNUM result; - extern double fmod P((double x, double y)); - - if (! do_optimize) - return node(left, op, right); - - /* Unary not */ - if (right == NULL) { - if (op == Node_not && left->type == Node_val) { - if ((left->flags & (STRCUR|STRING)) != 0) { - NODE *ret; - if (left->stlen == 0) { - ret = make_number((AWKNUM) 1.0); - } else { - ret = make_number((AWKNUM) 0.0); - } - unref(left); - - return ret; - } else { - if (left->numbr == 0) { - left->numbr = 1.0; - } else { - left->numbr = 0.0; - } - - return left; - } - } - - return node(left, op, right); - } - - /* String concatentation of two string constants */ - if (op == Node_concat - && left->type == Node_val - && (left->flags & (STRCUR|STRING)) != 0 - && right->type == Node_val - && (right->flags & (STRCUR|STRING)) != 0) { - size_t newlen = left->stlen + right->stlen + 2; - - erealloc(left->stptr, char *, newlen, "constant_fold"); - memcpy(left->stptr + left->stlen, right->stptr, right->stlen); - left->stptr[left->stlen + right->stlen] = '\0'; - left->stlen += right->stlen; - - unref(right); - return left; - } - - /* - * From here down, numeric operations. - * Check for string and bail out if have them. - */ - if (left->type != Node_val - || (left->flags & (STRCUR|STRING)) != 0 - || right->type != Node_val - || (right->flags & (STRCUR|STRING)) != 0) { - return node(left, op, right); - } - - /* Numeric operations: */ - switch (op) { - case Node_not: - case Node_exp: - case Node_times: - case Node_quotient: - case Node_mod: - case Node_plus: - case Node_minus: - break; - default: - return node(left, op, right); - } - - left->numbr = force_number(left); - right->numbr = force_number(right); - - result = left->numbr; - switch (op) { - case Node_exp: - result = calc_exp(left->numbr, right->numbr); - break; - case Node_times: - result *= right->numbr; - break; - case Node_quotient: - if (right->numbr == 0) - fatal(_("division by zero attempted in `/'")); - result /= right->numbr; - break; - case Node_mod: - if (right->numbr == 0) - fatal(_("division by zero attempted in `%%'")); -#ifdef HAVE_FMOD - result = fmod(result, right->numbr); -#else /* ! HAVE_FMOD */ - (void) modf(left->numbr / right->numbr, &result); - result = left->numbr - result * right->numbr; -#endif /* ! HAVE_FMOD */ - break; - case Node_plus: - result += right->numbr; - break; - case Node_minus: - result -= right->numbr; - break; - default: - /* Shut up compiler warnings */ - fatal("can't happen"); - break; - } - - unref(right); - left->numbr = result; - - return left; -} - -/* optimize_concat --- optimize the general "x = x y z a" case */ - -static NODE * -optimize_concat(NODE *left, NODETYPE op, NODE *right) -{ - NODE *top, *leftmost; - - if (op != Node_assign) - return node(left, op, right); - - /* - * optimization of `x = x y'. can save lots of time - * if done a lot. - */ - if (( left->type == Node_var - || left->type == Node_var_new - || left->type == Node_param_list) - && right->type == Node_concat) { - /* find bottom of tree, save it */ - for (top = right; top->lnode != NULL && top->type == Node_concat; top = top->lnode) { - leftmost = top->lnode; - if (leftmost->type == Node_concat) - continue; - - /* at this point, we've run out of concatentation */ - if (leftmost != left) - return node(left, op, right); - - top->lnode = Nnull_string; - return node(left, Node_assign_concat, right); - } - } - return node(left, op, right); -} |