summaryrefslogtreecommitdiff
path: root/field.c
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2010-07-16 12:45:40 +0300
committerArnold D. Robbins <arnold@skeeve.com>2010-07-16 12:45:40 +0300
commit558ba97bdeac5a68bb9248a5c4cdf2feeb24e771 (patch)
tree5c03c98edb9c5488103a6ffdef047e590e0b35b9 /field.c
parent8c042f99cc7465c86351d21331a129111b75345d (diff)
downloadgawk-558ba97bdeac5a68bb9248a5c4cdf2feeb24e771.tar.gz
Move to gawk-3.0.1.gawk-3.0.1
Diffstat (limited to 'field.c')
-rw-r--r--field.c248
1 files changed, 186 insertions, 62 deletions
diff --git a/field.c b/field.c
index 4b638b2c..c5b4e10f 100644
--- a/field.c
+++ b/field.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991-1996 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Programming Language.
@@ -24,6 +24,7 @@
*/
#include "awk.h"
+#include <assert.h>
typedef void (* Setfunc) P((long, char *, long, NODE *));
@@ -34,6 +35,8 @@ static long re_parse_field P((long, char **, int, NODE *,
Regexp *, Setfunc, NODE *));
static long def_parse_field P((long, char **, int, NODE *,
Regexp *, Setfunc, NODE *));
+static long posix_def_parse_field P((long, char **, int, NODE *,
+ Regexp *, Setfunc, NODE *));
static long null_parse_field P((long, char **, int, NODE *,
Regexp *, Setfunc, NODE *));
static long sc_parse_field P((long, char **, int, NODE *,
@@ -52,12 +55,11 @@ static int resave_fs;
static NODE *save_FS; /* save current value of FS when line is read,
* to be used in deferred parsing
*/
-static NODE **nodes; /* permanent repository of field nodes */
static int *FIELDWIDTHS = NULL;
NODE **fields_arr; /* array of pointers to the field nodes */
int field0_valid; /* $(>0) has not been changed yet */
-int default_FS; /* 1 when FS == " " */
+int default_FS; /* TRUE when FS == " " */
Regexp *FS_regexp = NULL;
/* init_fields --- set up the fields array to start with */
@@ -68,11 +70,10 @@ init_fields()
NODE *n;
emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
- emalloc(nodes, NODE **, sizeof(NODE *), "init_fields");
getnode(n);
*n = *Nnull_string;
- n->flags |= SCALAR;
- fields_arr[0] = nodes[0] = n;
+ n->flags |= (SCALAR|PERM);
+ fields_arr[0] = n;
parse_extent = fields_arr[0]->stptr;
save_FS = dupnode(FS_node->var_value);
field0_valid = TRUE;
@@ -88,12 +89,11 @@ long num;
register NODE *n;
erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "grow_fields_arr");
- erealloc(nodes, NODE **, (num+1) * sizeof(NODE *), "grow_fields_arr");
for (t = nf_high_water + 1; t <= num; t++) {
getnode(n);
*n = *Nnull_string;
n->flags |= SCALAR;
- fields_arr[t] = nodes[t] = n;
+ fields_arr[t] = n;
}
nf_high_water = num;
}
@@ -112,11 +112,10 @@ NODE *dummy; /* not used -- just to make interface same as set_element */
if (num > nf_high_water)
grow_fields_arr(num);
- n = nodes[num];
+ n = fields_arr[num];
n->stptr = str;
n->stlen = len;
- n->flags = (PERM|STR|STRING|MAYBE_NUM);
- fields_arr[num] = n;
+ n->flags = (PERM|STR|STRING|MAYBE_NUM|SCALAR);
}
/* rebuild_record --- Someone assigned a value to $(something).
@@ -135,13 +134,17 @@ rebuild_record()
NODE *ofs;
char *ops;
register char *cops;
- register NODE **ptr;
+ long i;
+ char *f0start, *f0end;
+
+ assert(NF != -1);
tlen = 0;
ofs = force_string(OFS_node->var_value);
ofslen = ofs->stlen;
- for (ptr = &fields_arr[NF]; ptr > &fields_arr[0]; ptr--) {
- tmp = force_string(*ptr);
+ for (i = NF; i > 0; i--) {
+ tmp = fields_arr[i];
+ tmp = force_string(tmp);
tlen += tmp->stlen;
}
tlen += (NF - 1) * ofslen;
@@ -150,15 +153,17 @@ rebuild_record()
emalloc(ops, char *, tlen + 2, "rebuild_record");
cops = ops;
ops[0] = '\0';
- for (ptr = &fields_arr[1]; ptr <= &fields_arr[NF]; ptr++) {
- tmp = *ptr;
+ for (i = 1; i <= NF; i++) {
+ tmp = fields_arr[i];
+ /* copy field */
if (tmp->stlen == 1)
*cops++ = tmp->stptr[0];
else if (tmp->stlen != 0) {
memcpy(cops, tmp->stptr, tmp->stlen);
cops += tmp->stlen;
}
- if (ptr != &fields_arr[NF]) {
+ /* copy OFS */
+ if (i != NF) {
if (ofslen == 1)
*cops++ = ofs->stptr[0];
else if (ofslen != 0) {
@@ -168,8 +173,29 @@ rebuild_record()
}
}
tmp = make_str_node(ops, tlen, ALREADY_MALLOCED);
+
+ /*
+ * Since we are about to unref fields_arr[0], we want to find
+ * any fields that still point into it, and have them point
+ * into the new field zero.
+ */
+ f0start = fields_arr[0]->stptr;
+ f0end = fields_arr[0]->stptr + fields_arr[0]->stlen;
+ for (cops = ops, i = 1; i <= NF; i++) {
+ char *field_data = fields_arr[i]->stptr;
+
+ if (fields_arr[i]->stlen > 0
+ && f0start <= field_data && field_data < f0end)
+ fields_arr[i]->stptr = cops;
+
+ cops += fields_arr[i]->stlen + ofslen;
+ }
+
+ fields_arr[0]->flags &= ~PERM;
unref(fields_arr[0]);
+
fields_arr[0] = tmp;
+ fields_arr[0]->flags |= PERM;
field0_valid = TRUE;
}
@@ -180,20 +206,26 @@ rebuild_record()
*/
void
set_record(buf, cnt, freeold)
-char *buf;
-int cnt;
+char *buf; /* ignored if ! freeold */
+int cnt; /* ignored if ! freeold */
int freeold;
{
register int i;
+ NODE *n;
NF = -1;
- for (i = 1; i <= parse_high_water; i++)
+ for (i = 1; i <= parse_high_water; i++) {
unref(fields_arr[i]);
+ getnode(n);
+ *n = *Nnull_string;
+ n->flags |= SCALAR;
+ fields_arr[i] = n;
+ }
parse_high_water = 0;
/*
* $0 = $0 should resplit using the current value of FS, thus,
- * this if is executed orthogonally to the value of freeold.
+ * this is executed orthogonally to the value of freeold.
*/
if (resave_fs) {
resave_fs = FALSE;
@@ -202,13 +234,16 @@ int freeold;
}
if (freeold) {
unref(fields_arr[0]);
- nodes[0]->stptr = buf;
- nodes[0]->stlen = cnt;
- nodes[0]->stref = 1;
- nodes[0]->flags = (STRING|STR|PERM|MAYBE_NUM);
- fields_arr[0] = nodes[0];
+ getnode(n);
+ n->stptr = buf;
+ n->stlen = cnt;
+ n->stref = 1;
+ n->type = Node_val;
+ n->stfmt = -1;
+ n->flags = (STRING|STR|PERM|MAYBE_NUM|SCALAR);
+ fields_arr[0] = n;
}
- fields_arr[0]->flags |= MAYBE_NUM;
+ fields_arr[0]->flags |= (MAYBE_NUM|PERM);
field0_valid = TRUE;
}
@@ -227,13 +262,30 @@ void
set_NF()
{
register int i;
+ NODE *n;
+
+ assert(NF != -1);
NF = (long) force_number(NF_node->var_value);
if (NF > nf_high_water)
grow_fields_arr(NF);
- for (i = parse_high_water + 1; i <= NF; i++) {
- unref(fields_arr[i]);
- fields_arr[i] = Nnull_string;
+ if (parse_high_water < NF) {
+ for (i = parse_high_water + 1; i <= NF; i++) {
+ unref(fields_arr[i]);
+ getnode(n);
+ *n = *Nnull_string;
+ n->flags |= SCALAR;
+ fields_arr[i] = n;
+ }
+ } else if (parse_high_water > 0) {
+ for (i = NF + 1; i <= parse_high_water; i++) {
+ unref(fields_arr[i]);
+ getnode(n);
+ *n = *Nnull_string;
+ n->flags |= SCALAR;
+ fields_arr[i] = n;
+ }
+ parse_high_water = NF;
}
field0_valid = FALSE;
}
@@ -303,6 +355,7 @@ NODE *n;
* via (*parse_field)(). This variation is for when FS is a single space
* character.
*/
+
static long
def_parse_field(up_to, buf, len, fs, rp, set, n)
long up_to; /* parse only up to this field number */
@@ -343,6 +396,74 @@ NODE *n;
/*
* special case: fs is single space, strip leading whitespace
*/
+ while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
+ scan++;
+ if (scan >= end)
+ break;
+ field = scan;
+ while (*scan != ' ' && *scan != '\t' && *scan != '\n')
+ scan++;
+ (*set)(++nf, field, (long)(scan - field), n);
+ if (scan == end)
+ break;
+ }
+
+ /* everything done, restore original char at *end */
+ *end = sav;
+
+ *buf = scan;
+ return nf;
+}
+
+/*
+ * posix_def_parse_field --- default field parsing.
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for when FS is a single space
+ * character. The only difference between this and def_parse_field()
+ * is that this one does not allow newlines to separate fields.
+ */
+
+static long
+posix_def_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set; /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register long nf = parse_high_water;
+ register char *field;
+ register char *end = scan + len;
+ char sav;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+
+ /*
+ * Nasty special case. If FS set to "", return whole record
+ * as first field. This is not worth a separate function.
+ */
+ if (fs->stlen == 0) {
+ (*set)(++nf, *buf, len, n);
+ *buf += len;
+ return nf;
+ }
+
+ /* before doing anything save the char at *end */
+ sav = *end;
+ /* because it will be destroyed now: */
+
+ *end = ' '; /* sentinel character */
+ for (; nf < up_to; scan++) {
+ /*
+ * special case: fs is single space, strip leading whitespace
+ */
while (scan < end && (*scan == ' ' || *scan == '\t'))
scan++;
if (scan >= end)
@@ -431,7 +552,7 @@ NODE *n;
onecase = (IGNORECASE && isalpha(fschar));
if (onecase)
- fschar = casetable[fschar];
+ fschar = casetable[(int) fschar];
/* before doing anything save the char at *end */
sav = *end;
@@ -441,7 +562,7 @@ NODE *n;
for (; nf < up_to;) {
field = scan;
if (onecase) {
- while (casetable[*scan] != fschar)
+ while (casetable[(int) *scan] != fschar)
scan++;
} else {
while (*scan != fschar)
@@ -524,7 +645,6 @@ Func_ptr *assign; /* this field is on the LHS of an assign */
parse_high_water = NF;
}
rebuild_record();
- reset_record(); /* clear out fields array */
}
if (assign != NULL)
*assign = reset_record;
@@ -565,15 +685,9 @@ Func_ptr *assign; /* this field is on the LHS of an assign */
}
if (parse_high_water < requested) { /* requested beyond end of record */
if (assign != NULL) { /* expand record */
- register int i;
-
if (requested > nf_high_water)
grow_fields_arr(requested);
- /* fill in fields that don't exist */
- for (i = parse_high_water + 1; i <= requested; i++)
- fields_arr[i] = Nnull_string;
-
NF = requested;
parse_high_water = requested;
} else
@@ -605,14 +719,13 @@ NODE *
do_split(tree)
NODE *tree;
{
- NODE *t1, *t2, *t3, *tmp;
+ NODE *src, *arr, *sep, *tmp;
NODE *fs;
char *s;
long (*parseit) P((long, char **, int, NODE *,
Regexp *, Setfunc, NODE *));
Regexp *rp = NULL;
-
/*
* do dupnode(), to avoid problems like
* x = split(a[1], a, "blah")
@@ -620,45 +733,52 @@ NODE *tree;
* this also gives us complete call by value semantics.
*/
tmp = tree_eval(tree->lnode);
- t1 = dupnode(tmp);
+ src = dupnode(tmp);
free_temp(tmp);
- t2 = tree->rnode->lnode;
- t3 = tree->rnode->rnode->lnode;
+ arr = tree->rnode->lnode;
+ if (tree->rnode->rnode != NULL)
+ sep = tree->rnode->rnode->lnode; /* 3rd arg */
+ else
+ sep = NULL;
- (void) force_string(t1);
+ (void) force_string(src);
- if (t2->type == Node_param_list)
- t2 = stack_ptr[t2->param_cnt];
- if (t2->type != Node_var && t2->type != Node_var_array)
+ if (arr->type == Node_param_list)
+ arr = stack_ptr[arr->param_cnt];
+ if (arr->type != Node_var && arr->type != Node_var_array)
fatal("second argument of split is not an array");
- assoc_clear(t2);
+ arr->type = Node_var_array;
+ assoc_clear(arr);
- if (t3->re_flags & FS_DFLT) {
+ if (sep->re_flags & FS_DFLT) {
parseit = parse_field;
fs = force_string(FS_node->var_value);
rp = FS_regexp;
} else {
- tmp = force_string(tree_eval(t3->re_exp));
+ tmp = force_string(tree_eval(sep->re_exp));
if (tmp->stlen == 0)
parseit = null_parse_field;
- else if (tmp->stlen == 1) {
- if (tmp->stptr[0] == ' ')
- parseit = def_parse_field;
- else
+ else if (tmp->stlen == 1 && (sep->re_flags & CONST) == 0) {
+ if (tmp->stptr[0] == ' ') {
+ if (do_posix)
+ parseit = posix_def_parse_field;
+ else
+ parseit = def_parse_field;
+ } else
parseit = sc_parse_field;
} else {
parseit = re_parse_field;
- rp = re_update(t3);
+ rp = re_update(sep);
}
fs = tmp;
}
- s = t1->stptr;
- tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int) t1->stlen,
- fs, rp, set_element, t2));
- unref(t1);
- free_temp(t3);
+ s = src->stptr;
+ tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int) src->stlen,
+ fs, rp, set_element, arr));
+ unref(src);
+ free_temp(sep);
return tmp;
}
@@ -760,7 +880,10 @@ set_FS()
sprintf(buf, "[%c\n]", fs->stptr[0]);
}
} else {
- parse_field = def_parse_field;
+ if (do_posix)
+ parse_field = posix_def_parse_field;
+ else
+ parse_field = def_parse_field;
if (fs->stptr[0] == ' ' && fs->stlen == 1)
default_FS = TRUE;
else if (fs->stptr[0] != ' ' && fs->stlen == 1) {
@@ -789,3 +912,4 @@ using_fieldwidths()
{
return parse_field == fw_parse_field;
}
+