summaryrefslogtreecommitdiff
path: root/array.c
diff options
context:
space:
mode:
authorjohn haque <j.eh@mchsi.com>2011-08-20 08:28:02 -0500
committerjohn haque <j.eh@mchsi.com>2011-10-12 07:33:05 -0500
commit1fea520248b42ca995c8797698c60301ea42ffe9 (patch)
tree1aa80c13392c25aa6bf3eb931fec9c83a0621e25 /array.c
parentf757e147f1ae8254669b3222bc24a39ee8ff9b8f (diff)
downloadgawk-1fea520248b42ca995c8797698c60301ea42ffe9.tar.gz
Speed/memory performance improvements.
Diffstat (limited to 'array.c')
-rw-r--r--array.c1379
1 files changed, 489 insertions, 890 deletions
diff --git a/array.c b/array.c
index 82e99a4b..91b87572 100644
--- a/array.c
+++ b/array.c
@@ -1,5 +1,5 @@
/*
- * array.c - routines for associative arrays.
+ * array.c - routines for awk arrays.
*/
/*
@@ -25,69 +25,190 @@
#include "awk.h"
-/*
- * Tree walks (``for (iggy in foo)'') and array deletions use expensive
- * linear searching. So what we do is start out with small arrays and
- * grow them as needed, so that our arrays are hopefully small enough,
- * most of the time, that they're pretty full and we're not looking at
- * wasted space.
- *
- * The decision is made to grow the array if the average chain length is
- * ``too big''. This is defined as the total number of entries in the table
- * divided by the size of the array being greater than some constant.
- *
- * We make the constant a variable, so that it can be tweaked
- * via environment variable.
- */
-
-static size_t AVG_CHAIN_MAX = 2; /* Modern machines are bigger, reduce this from 10. */
+extern FILE *output_fp;
+extern NODE **fmt_list; /* declared in eval.c */
+extern array_ptr str_array_func[];
+extern array_ptr cint_array_func[];
+extern array_ptr int_array_func[];
static size_t SUBSEPlen;
static char *SUBSEP;
+static char indent_char[] = " ";
-static NODE *assoc_find(NODE *symbol, NODE *subs, unsigned long hash1, NODE **last);
-static void grow_table(NODE *symbol);
+static NODE **e_lookup(NODE *symbol, NODE *subs);
+static array_ptr empty_array_func[NUM_AFUNCS] = {
+ (array_ptr) 0,
+ (array_ptr) 0,
+ e_lookup,
+};
-static unsigned long gst_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code);
-static unsigned long scramble(unsigned long x);
-static unsigned long awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code);
+#define MAX_ATYPE 10
-unsigned long (*hash)(const char *s, size_t len, unsigned long hsize, size_t *code) = awk_hash;
+static array_ptr *atypes[MAX_ATYPE];
+static int num_atypes = 0;
-/* qsort comparison function */
-static int sort_up_index_string(const void *, const void *);
-static int sort_down_index_string(const void *, const void *);
-static int sort_up_index_number(const void *, const void *);
-static int sort_down_index_number(const void *, const void *);
-static int sort_up_value_string(const void *, const void *);
-static int sort_down_value_string(const void *, const void *);
-static int sort_up_value_number(const void *, const void *);
-static int sort_down_value_number(const void *, const void *);
-static int sort_up_value_type(const void *, const void *);
-static int sort_down_value_type(const void *, const void *);
+/*
+ * index 0 : initialization.
+ * index 1 : check if index is compatible.
+ * index 8 : array dump, memory and other statistics (do_adump).
+ * Also used by debugger 'examine' command.
+ */
+
+
+int
+register_array_func(array_ptr *afunc)
+{
+ if (afunc && num_atypes < MAX_ATYPE) {
+ if (afunc != str_array_func && ! afunc[1])
+ return FALSE;
+ atypes[num_atypes++] = afunc;
+ if (afunc[0]) /* execute init routine if any */
+ (void) (*afunc[0])(NULL, NULL);
+ return TRUE;
+ }
+ return FALSE;
+}
-/* array_init --- check relevant environment variables */
+/* array_init --- register all builtin array types */
void
array_init()
{
- const char *val;
- char *endptr;
- size_t newval;
-
- if ((val = getenv("AVG_CHAIN_MAX")) != NULL && isdigit((unsigned char) *val)) {
- newval = strtoul(val, & endptr, 10);
- if (endptr != val && newval > 0)
- AVG_CHAIN_MAX = newval;
+ (void) register_array_func(str_array_func); /* the default */
+ (void) register_array_func(int_array_func);
+ (void) register_array_func(cint_array_func);
+}
+
+/* make_array --- create an array node */
+
+NODE *
+make_array()
+{
+ NODE *array;
+ getnode(array);
+ memset(array, '\0', sizeof(NODE));
+ array->type = Node_var_array;
+ array->array_funcs = empty_array_func;
+ /* vname, flags, and parent_array not set here */
+
+ return array;
+}
+
+
+/* init_array --- initialize an array node */
+
+void
+init_array(NODE *symbol)
+{
+ symbol->type = Node_var_array;
+ symbol->array_funcs = empty_array_func;
+ symbol->buckets = NULL;
+ symbol->table_size = symbol->array_size = 0;
+ symbol->array_capacity = 0;
+
+ assert(symbol->xarray == NULL);
+ /* symbol->xarray = NULL; */
+
+ /* flags, vname, parent_array not (re)initialized */
+}
+
+
+/* e_lookup: assign type to an empty array. */
+
+static NODE **
+e_lookup(NODE *symbol, NODE *subs)
+{
+ int i;
+ array_ptr *afunc = NULL;
+
+ assert(array_empty(symbol) == TRUE);
+
+ /* Check which array type wants to accept this sub; traverse
+ * array type list in reverse order.
+ */
+ for (i = num_atypes - 1; i >= 1; i--) {
+ afunc = atypes[i];
+ if (afunc[1](symbol, subs) != NULL)
+ break;
+ }
+ if (i == 0 || afunc == NULL)
+ afunc = atypes[0]; /* default is str_array_func */
+ symbol->array_funcs = afunc;
+
+ /* We have the right type of array; install the subscript */
+ return symbol->alookup(symbol, subs);
+}
+
+
+/* assoc_clear --- flush all the values in symbol[] */
+
+void
+assoc_clear(NODE *symbol)
+{
+ if (! array_empty(symbol))
+ (void) symbol->aclear(symbol, NULL);
+}
+
+
+/* r_in_array --- test whether the array element symbol[subs] exists or not,
+ * return pointer to value if it does.
+ */
+
+NODE *
+r_in_array(NODE *symbol, NODE *subs)
+{
+ NODE **ret;
+
+ if (array_empty(symbol))
+ return NULL;
+ ret = symbol->aexists(symbol, subs);
+ return (ret ? *ret : NULL);
+}
+
+
+/* assoc_remove --- remove an index from symbol[] */
+
+int
+assoc_remove(NODE *symbol, NODE *subs)
+{
+ if (array_empty(symbol))
+ return FALSE;
+ return (symbol->aremove(symbol, subs) != NULL);
+}
+
+
+/* assoc_copy --- duplicate input array "symbol" */
+
+NODE *
+assoc_copy(NODE *symbol, NODE *newsymb)
+{
+ assert(newsymb->vname != NULL);
+
+ assoc_clear(newsymb);
+ if (! array_empty(symbol)) {
+ (void) symbol->acopy(symbol, newsymb);
+ newsymb->array_funcs = symbol->array_funcs;
+ newsymb->flags = symbol->flags;
}
+ return newsymb;
+}
- if ((val = getenv("AWK_HASH")) != NULL && strcmp(val, "gst") == 0)
- hash = gst_hash_string;
+
+/* assoc_dump --- dump array */
+
+void
+assoc_dump(NODE *symbol, NODE *ndump)
+{
+ if (array_empty(symbol))
+ fprintf(output_fp, "array `%s' is empty\n", array_vname(symbol));
+ else if (symbol->adump)
+ (void) symbol->adump(symbol, ndump);
}
+
/* make_aname --- construct a 'vname' for a (sub)array */
-static char *
+const char *
make_aname(const NODE *symbol)
{
static char *aname = NULL;
@@ -115,10 +236,11 @@ make_aname(const NODE *symbol)
erealloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
}
memcpy(aname, symbol->vname, alen + 1);
- }
+ }
return aname;
-#undef SLEN
}
+#undef SLEN
+
/*
* array_vname --- print the name of the array
@@ -128,7 +250,7 @@ make_aname(const NODE *symbol)
* to save it, they have to make a copy.
*/
-char *
+const char *
array_vname(const NODE *symbol)
{
static char *message = NULL;
@@ -164,7 +286,6 @@ array_vname(const NODE *symbol)
else
aname = make_aname(symbol);
len += strlen(aname);
-
/*
* Each node contributes by strlen(from) minus the length
* of "%s" in the translation (which is at least 2)
@@ -218,7 +339,7 @@ get_array(NODE *symbol, int canfatal)
NODE *save_symbol = symbol;
int isparam = FALSE;
- if (symbol->type == Node_param_list && (symbol->flags & FUNC) == 0) {
+ if (symbol->type == Node_param_list) {
save_symbol = symbol = GET_PARAM(symbol->param_cnt);
isparam = TRUE;
if (symbol->type == Node_array_ref)
@@ -227,35 +348,24 @@ get_array(NODE *symbol, int canfatal)
switch (symbol->type) {
case Node_var_new:
- symbol->type = Node_var_array;
- symbol->var_array = NULL;
+ init_array(symbol);
symbol->parent_array = NULL; /* main array has no parent */
/* fall through */
case Node_var_array:
break;
case Node_array_ref:
- case Node_param_list:
- if ((symbol->flags & FUNC) == 0)
- cant_happen();
- /* else
- fall through */
-
default:
- /* notably Node_var but catches also e.g. FS[1] = "x" */
+ /* notably Node_var but catches also e.g. a[1] = "x"; a[1][1] = "y" */
if (canfatal) {
if (symbol->type == Node_val)
fatal(_("attempt to use a scalar value as array"));
-
- if ((symbol->flags & FUNC) != 0)
- fatal(_("attempt to use function `%s' as an array"),
- save_symbol->vname);
- else if (isparam)
+ if (isparam)
fatal(_("attempt to use scalar parameter `%s' as an array"),
- save_symbol->vname);
+ save_symbol->vname);
else
fatal(_("attempt to use scalar `%s' as an array"),
- save_symbol->vname);
+ save_symbol->vname);
} else
break;
}
@@ -269,16 +379,18 @@ get_array(NODE *symbol, int canfatal)
void
set_SUBSEP()
{
- SUBSEP = force_string(SUBSEP_node->var_value)->stptr;
+ SUBSEP_node->var_value = force_string(SUBSEP_node->var_value);
+ SUBSEP = SUBSEP_node->var_value->stptr;
SUBSEPlen = SUBSEP_node->var_value->stlen;
-}
+}
+
/* concat_exp --- concatenate expression list into a single string */
NODE *
concat_exp(int nargs, int do_subsep)
{
- /* do_subsep is false for Node-concat */
+ /* do_subsep is FALSE for Op_concat */
NODE *r;
char *str;
char *s;
@@ -295,13 +407,14 @@ concat_exp(int nargs, int do_subsep)
len = 0;
for (i = 1; i <= nargs; i++) {
- r = POP();
+ r = TOP();
if (r->type == Node_var_array) {
while (--i > 0)
DEREF(args_array[i]); /* avoid memory leak */
fatal(_("attempt to use array `%s' in a scalar context"), array_vname(r));
- }
- args_array[i] = force_string(r);
+ }
+ r = POP_STRING();
+ args_array[i] = r;
len += r->stlen;
}
len += (nargs - 1) * subseplen;
@@ -325,247 +438,7 @@ concat_exp(int nargs, int do_subsep)
DEREF(r);
}
- return make_str_node(str, len, ALREADY_MALLOCED);
-}
-
-
-/* assoc_clear --- flush all the values in symbol[] */
-
-void
-assoc_clear(NODE *symbol)
-{
- long i;
- NODE *bucket, *next;
-
- if (symbol->var_array == NULL)
- return;
-
- for (i = 0; i < symbol->array_size; i++) {
- for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
- next = bucket->ahnext;
- if (bucket->ahvalue->type == Node_var_array) {
- NODE *r = bucket->ahvalue;
- assoc_clear(r); /* recursively clear all sub-arrays */
- efree(r->vname);
- freenode(r);
- } else
- unref(bucket->ahvalue);
-
- unref(bucket); /* unref() will free the ahname_str */
- }
- symbol->var_array[i] = NULL;
- }
- efree(symbol->var_array);
- symbol->var_array = NULL;
- symbol->array_size = symbol->table_size = 0;
- symbol->flags &= ~ARRAYMAXED;
-}
-
-/* awk_hash --- calculate the hash function of the string in subs */
-
-static unsigned long
-awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code)
-{
- unsigned long h = 0;
- unsigned long htmp;
-
- /*
- * Ozan Yigit's original sdbm hash, copied from Margo Seltzers
- * db package.
- *
- * This is INCREDIBLY ugly, but fast. We break the string up into
- * 8 byte units. On the first time through the loop we get the
- * "leftover bytes" (strlen % 8). On every other iteration, we
- * perform 8 HASHC's so we handle all 8 bytes. Essentially, this
- * saves us 7 cmp & branch instructions. If this routine is
- * heavily used enough, it's worth the ugly coding.
- */
-
- /*
- * Even more speed:
- * #define HASHC h = *s++ + 65599 * h
- * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
- *
- * 4/2011: Force the results to 32 bits, to get the same
- * result on both 32- and 64-bit systems. This may be a
- * bad idea.
- */
-#define HASHC htmp = (h << 6); \
- h = *s++ + htmp + (htmp << 10) - h ; \
- htmp &= 0xFFFFFFFF; \
- h &= 0xFFFFFFFF
-
- h = 0;
-
- /* "Duff's Device" */
- if (len > 0) {
- size_t loop = (len + 8 - 1) >> 3;
-
- switch (len & (8 - 1)) {
- case 0:
- do { /* All fall throughs */
- HASHC;
- case 7: HASHC;
- case 6: HASHC;
- case 5: HASHC;
- case 4: HASHC;
- case 3: HASHC;
- case 2: HASHC;
- case 1: HASHC;
- } while (--loop);
- }
- }
-
- if (code != NULL)
- *code = h;
-
- if (h >= hsize)
- h %= hsize;
- return h;
-}
-
-/* assoc_find --- locate symbol[subs] */
-
-static NODE * /* NULL if not found */
-assoc_find(NODE *symbol, NODE *subs, unsigned long hash1, NODE **last)
-{
- NODE *bucket, *prev;
- const char *s1_str;
- size_t s1_len;
- NODE *s2;
-
- for (prev = NULL, bucket = symbol->var_array[hash1]; bucket != NULL;
- prev = bucket, bucket = bucket->ahnext) {
- /*
- * This used to use cmp_nodes() here. That's wrong.
- * Array indices are strings; compare as such, always!
- */
- s1_str = bucket->ahname_str;
- s1_len = bucket->ahname_len;
- s2 = subs;
-
- if (s1_len == s2->stlen) {
- if (s1_len == 0 /* "" is a valid index */
- || memcmp(s1_str, s2->stptr, s1_len) == 0)
- break;
- }
- }
- if (last != NULL)
- *last = prev;
- return bucket;
-}
-
-/* in_array --- test whether the array element symbol[subs] exists or not,
- * return pointer to value if it does.
- */
-
-NODE *
-in_array(NODE *symbol, NODE *subs)
-{
- unsigned long hash1;
- NODE *ret;
-
- assert(symbol->type == Node_var_array);
-
- if (symbol->var_array == NULL)
- return NULL;
-
- hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size, NULL);
- ret = assoc_find(symbol, subs, hash1, NULL);
- return (ret ? ret->ahvalue : NULL);
-}
-
-/*
- * assoc_lookup:
- * Find SYMBOL[SUBS] in the assoc array. Install it with value "" if it
- * isn't there. Returns a pointer ala get_lhs to where its value is stored.
- *
- * SYMBOL is the address of the node (or other pointer) being dereferenced.
- * SUBS is a number or string used as the subscript.
- */
-
-NODE **
-assoc_lookup(NODE *symbol, NODE *subs, int reference)
-{
- unsigned long hash1;
- NODE *bucket;
- size_t code;
-
- assert(symbol->type == Node_var_array);
-
- (void) force_string(subs);
-
- if (symbol->var_array == NULL) {
- symbol->array_size = symbol->table_size = 0; /* sanity */
- symbol->flags &= ~ARRAYMAXED;
- grow_table(symbol);
- hash1 = hash(subs->stptr, subs->stlen,
- (unsigned long) symbol->array_size, & code);
- } else {
- hash1 = hash(subs->stptr, subs->stlen,
- (unsigned long) symbol->array_size, & code);
- bucket = assoc_find(symbol, subs, hash1, NULL);
- if (bucket != NULL)
- return &(bucket->ahvalue);
- }
-
- if (do_lint && reference) {
- lintwarn(_("reference to uninitialized element `%s[\"%.*s\"]'"),
- array_vname(symbol), (int)subs->stlen, subs->stptr);
- }
-
- /* It's not there, install it. */
- if (do_lint && subs->stlen == 0)
- lintwarn(_("subscript of array `%s' is null string"),
- array_vname(symbol));
-
- /* first see if we would need to grow the array, before installing */
- symbol->table_size++;
- if ((symbol->flags & ARRAYMAXED) == 0
- && (symbol->table_size / symbol->array_size) > AVG_CHAIN_MAX) {
- grow_table(symbol);
- /* have to recompute hash value for new size */
- hash1 = code % (unsigned long) symbol->array_size;
- }
-
- getnode(bucket);
- bucket->type = Node_ahash;
-
- /*
- * Freeze this string value --- it must never
- * change, no matter what happens to the value
- * that created it or to CONVFMT, etc.
- *
- * One day: Use an atom table to track array indices,
- * and avoid the extra memory overhead.
- */
- bucket->flags |= MALLOC;
- bucket->ahname_ref = 1;
-
- emalloc(bucket->ahname_str, char *, subs->stlen + 2, "assoc_lookup");
- bucket->ahname_len = subs->stlen;
- memcpy(bucket->ahname_str, subs->stptr, subs->stlen);
- bucket->ahname_str[bucket->ahname_len] = '\0';
- bucket->ahvalue = Nnull_string;
-
- bucket->ahnext = symbol->var_array[hash1];
- bucket->ahcode = code;
-
- /*
- * Set the numeric value for the index if it's available. Useful
- * for numeric sorting by index. Do this only if the numeric
- * value is available, instead of all the time, since doing it
- * all the time is a big performance hit for something that may
- * never be used.
- */
- if ((subs->flags & NUMCUR) != 0) {
- bucket->ahname_num = subs->numbr;
- bucket->flags |= NUMIND;
- }
-
- /* hook it into the symbol table */
- symbol->var_array[hash1] = bucket;
- return &(bucket->ahvalue);
+ return make_str_node(str, len);
}
@@ -602,7 +475,7 @@ adjust_fcall_stack(NODE *symbol, int nsubs)
func = frame_ptr->func_node;
if (func == NULL) /* in main */
return;
- pcount = func->lnode->param_cnt;
+ pcount = func->param_cnt;
sp = frame_ptr->stack;
for (; pcount > 0; pcount--) {
@@ -627,12 +500,10 @@ adjust_fcall_stack(NODE *symbol, int nsubs)
* function f(c, d) { delete c; ..}
* BEGIN { a[0][0] = 1; f(a[0], a[0]); ...}
*/
- char *save;
-local_array:
- save = r->vname;
- memset(r, '\0', sizeof(NODE));
- r->vname = save;
- r->type = Node_var_array;
+
+ init_array(r);
+ r->parent_array = NULL;
+ r->flags = 0;
continue;
}
@@ -648,8 +519,10 @@ local_array:
* BEGIN { a[0][0][0][0] = 1; f(a[0], a[0][0][0]); .. }
*
*/
-
- goto local_array;
+ init_array(r);
+ r->parent_array = NULL;
+ r->flags = 0;
+ break;
}
}
}
@@ -660,18 +533,17 @@ local_array:
/*
* `symbol' is array
- * `nsubs' is number of subscripts
+ * `nsubs' is no of subscripts
*/
void
do_delete(NODE *symbol, int nsubs)
{
- unsigned long hash1 = 0;
- NODE *subs, *bucket, *last, *r;
+ NODE *val, *subs;
int i;
assert(symbol->type == Node_var_array);
- subs = bucket = last = r = NULL; /* silence the compiler */
+ subs = val = NULL; /* silence the compiler */
/*
* The force_string() call is needed to make sure that
@@ -683,16 +555,17 @@ do_delete(NODE *symbol, int nsubs)
* Without it, the code does not fail.
*/
-#define free_subs(n) \
-do { \
+#define free_subs(n) do { \
NODE *s = PEEK(n - 1); \
if (s->type == Node_val) { \
- (void) force_string(s); /* may have side effects ? */ \
+ (void) force_string(s); /* may have side effects. */ \
DEREF(s); \
} \
} while (--n > 0)
- if (nsubs == 0) { /* delete array */
+ if (nsubs == 0) {
+ /* delete array */
+
adjust_fcall_stack(symbol, 0); /* fix function call stack; See above. */
assoc_clear(symbol);
return;
@@ -706,66 +579,46 @@ do { \
free_subs(i);
fatal(_("attempt to use array `%s' in a scalar context"), array_vname(subs));
}
- (void) force_string(subs);
- last = NULL; /* shut up gcc -Wall */
- hash1 = 0; /* ditto */
- bucket = NULL; /* array may be empty */
-
- if (symbol->var_array != NULL) {
- hash1 = hash(subs->stptr, subs->stlen,
- (unsigned long) symbol->array_size, NULL);
- bucket = assoc_find(symbol, subs, hash1, &last);
- }
-
- if (bucket == NULL) {
- if (do_lint)
+ val = in_array(symbol, subs);
+ if (val == NULL) {
+ if (do_lint) {
+ subs = force_string(subs);
lintwarn(_("delete: index `%s' not in array `%s'"),
subs->stptr, array_vname(symbol));
+ }
/* avoid memory leak, free all subs */
free_subs(i);
return;
}
if (i > 1) {
- if (bucket->ahvalue->type != Node_var_array) {
+ if (val->type != Node_var_array) {
/* e.g.: a[1] = 1; delete a[1][1] */
+
free_subs(i);
+ subs = force_string(subs);
fatal(_("attempt to use scalar `%s[\"%.*s\"]' as an array"),
array_vname(symbol),
- (int) bucket->ahname_len,
- bucket->ahname_str);
+ (int) subs->stlen,
+ subs->stptr);
}
- symbol = bucket->ahvalue;
+ symbol = val;
+ DEREF(subs);
}
- DEREF(subs);
}
- r = bucket->ahvalue;
- if (r->type == Node_var_array) {
- adjust_fcall_stack(r, nsubs); /* fix function call stack; See above. */
- assoc_clear(r);
+ if (val->type == Node_var_array) {
+ adjust_fcall_stack(val, nsubs); /* fix function call stack; See above. */
+ assoc_clear(val);
/* cleared a sub-array, free Node_var_array */
- efree(r->vname);
- freenode(r);
+ efree(val->vname);
+ freenode(val);
} else
- unref(r);
+ unref(val);
- if (last != NULL)
- last->ahnext = bucket->ahnext;
- else
- symbol->var_array[hash1] = bucket->ahnext;
-
- unref(bucket); /* unref() will free the ahname_str */
- symbol->table_size--;
- if (symbol->table_size <= 0) {
- symbol->table_size = symbol->array_size = 0;
- symbol->flags &= ~ARRAYMAXED;
- if (symbol->var_array != NULL) {
- efree(symbol->var_array);
- symbol->var_array = NULL;
- }
- }
+ (void) assoc_remove(symbol, subs);
+ DEREF(subs);
#undef free_subs
}
@@ -782,272 +635,159 @@ do { \
void
do_delete_loop(NODE *symbol, NODE **lhs)
{
- long i;
-
- assert(symbol->type == Node_var_array);
+ NODE **list;
+ NODE fl;
- if (symbol->var_array == NULL)
+ if (array_empty(symbol))
return;
- /* get first index value */
- for (i = 0; i < symbol->array_size; i++) {
- if (symbol->var_array[i] != NULL) {
- unref(*lhs);
- *lhs = make_string(symbol->var_array[i]->ahname_str,
- symbol->var_array[i]->ahname_len);
- break;
- }
- }
+ fl.flags = AINDEX|ADELETE; /* need a single index */
+ list = symbol->alist(symbol, & fl);
+ assert(list != NULL);
+
+ unref(*lhs);
+ *lhs = list[0];
+ efree(list);
/* blast the array in one shot */
- adjust_fcall_stack(symbol, 0);
+ adjust_fcall_stack(symbol, 0);
assoc_clear(symbol);
}
-/* grow_table --- grow a hash table */
+
+/* value_info --- print scalar node info */
+
+int PREC_NUM = -1;
+int PREC_STR = -1;
static void
-grow_table(NODE *symbol)
+value_info(NODE *n)
{
- NODE **old, **new, *chain, *next;
- int i, j;
- unsigned long hash1;
- unsigned long oldsize, newsize, k;
- /*
- * This is an array of primes. We grow the table by an order of
- * magnitude each time (not just doubling) so that growing is a
- * rare operation. We expect, on average, that it won't happen
- * more than twice. When things are very large (> 8K), we just
- * double more or less, instead of just jumping from 8K to 64K.
- */
- static const long sizes[] = {
- 13, 127, 1021, 8191, 16381, 32749, 65497, 131101, 262147,
- 524309, 1048583, 2097169, 4194319, 8388617, 16777259, 33554467,
- 67108879, 134217757, 268435459, 536870923, 1073741827
- };
-
- /* find next biggest hash size */
- newsize = oldsize = symbol->array_size;
- for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
- if (oldsize < sizes[i]) {
- newsize = sizes[i];
- break;
- }
- }
-
- if (newsize == oldsize) { /* table already at max (!) */
- symbol->flags |= ARRAYMAXED;
+ if (n == Nnull_string || n == Null_field) {
+ fprintf(output_fp, "<(null)>");
return;
}
- /* allocate new table */
- emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table");
- memset(new, '\0', newsize * sizeof(NODE *));
+ if ((n->flags & (STRING|STRCUR)) != 0) {
+ fprintf(output_fp, "<");
+ fprintf(output_fp, "\"%.*s\"", PREC_STR, n->stptr);
+ if ((n->flags & (NUMBER|NUMCUR)) != 0)
+ fprintf(output_fp, ":%.*g", PREC_NUM, n->numbr);
+ fprintf(output_fp, ">");
+ } else
+ fprintf(output_fp, "<%.*g>", PREC_NUM, n->numbr);
- /* brand new hash table, set things up and return */
- if (symbol->var_array == NULL) {
- symbol->table_size = 0;
- goto done;
- }
+ fprintf(output_fp, ":%s", flags2str(n->flags));
- /* old hash table there, move stuff to new, free old */
- old = symbol->var_array;
- for (k = 0; k < oldsize; k++) {
- if (old[k] == NULL)
- continue;
-
- for (chain = old[k]; chain != NULL; chain = next) {
- next = chain->ahnext;
- hash1 = chain->ahcode % newsize;
+ if ((n->flags & FIELD) == 0)
+ fprintf(output_fp, ":%ld", n->valref);
+ else
+ fprintf(output_fp, ":");
- /* remove from old list, add to new */
- chain->ahnext = new[hash1];
- new[hash1] = chain;
- }
+ if ((n->flags & (STRING|STRCUR)) == STRCUR) {
+ fprintf(output_fp, "][");
+ fprintf(output_fp, "stfmt=%d, ", n->stfmt);
+ fprintf(output_fp, "CONVFMT=\"%s\"", n->stfmt <= -1 ? "%ld"
+ : fmt_list[n->stfmt]->stptr);
}
- efree(old);
-
-done:
- /*
- * note that symbol->table_size does not change if an old array,
- * and is explicitly set to 0 if a new one.
- */
- symbol->var_array = new;
- symbol->array_size = newsize;
}
-/* pr_node --- print simple node info */
-static void
-pr_node(NODE *n)
+#ifdef ARRAYDEBUG
+
+NODE *
+do_aoption(int nargs)
{
- if ((n->flags & NUMBER) != 0)
- printf("%s %g p: %p", flags2str(n->flags), n->numbr, n);
- else
- printf("%s %.*s p: %p", flags2str(n->flags),
- (int) n->stlen, n->stptr, n);
+ int ret = -1;
+ NODE *opt, *val;
+ int i;
+ array_ptr *afunc;
+
+ val = POP_SCALAR();
+ opt = POP_SCALAR();
+ for (i = 0; i < num_atypes; i++) {
+ afunc = atypes[i];
+ if (afunc[NUM_AFUNCS] && (*afunc[NUM_AFUNCS])(opt, val) != NULL) {
+ ret = 0;
+ break;
+ }
+ }
+ DEREF(opt);
+ DEREF(val);
+ return make_number((AWKNUM) ret);
}
+#endif
-static void
+void
indent(int indent_level)
{
- int k;
- for (k = 0; k < indent_level; k++)
- putchar('\t');
+ int i;
+ for (i = 0; i < indent_level; i++)
+ fprintf(output_fp, "%s", indent_char);
}
-/* assoc_dump --- dump the contents of an array */
+/* assoc_info --- print index, value info */
-NODE *
-assoc_dump(NODE *symbol, int indent_level)
+void
+assoc_info(NODE *subs, NODE *val, NODE *ndump, const char *aname)
{
- long i;
- NODE *bucket;
+ int indent_level = ndump->alevel;
+ indent_level++;
indent(indent_level);
- if (symbol->var_array == NULL) {
- printf(_("%s: empty (null)\n"), symbol->vname);
- return make_number((AWKNUM) 0);
- }
-
- if (symbol->table_size == 0) {
- printf(_("%s: empty (zero)\n"), symbol->vname);
- return make_number((AWKNUM) 0);
- }
+ fprintf(output_fp, "I: [%s:", aname);
+ if ((subs->flags & INTIND) != 0)
+ fprintf(output_fp, "<%ld>", (long) subs->numbr);
+ else
+ value_info(subs);
+ fprintf(output_fp, "]\n");
- printf(_("%s: table_size = %d, array_size = %d\n"), symbol->vname,
- (int) symbol->table_size, (int) symbol->array_size);
-
- for (i = 0; i < symbol->array_size; i++) {
- for (bucket = symbol->var_array[i]; bucket != NULL;
- bucket = bucket->ahnext) {
- indent(indent_level);
- printf("%s: I: [len %d <%.*s> p: %p] V: [",
- symbol->vname,
- (int) bucket->ahname_len,
- (int) bucket->ahname_len,
- bucket->ahname_str,
- bucket->ahname_str);
- if (bucket->ahvalue->type == Node_var_array) {
- printf("\n");
- assoc_dump(bucket->ahvalue, indent_level + 1);
- indent(indent_level);
- } else
- pr_node(bucket->ahvalue);
- printf("]\n");
- }
+ indent(indent_level);
+ if (val->type == Node_val) {
+ fprintf(output_fp, "V: [scalar: ");
+ value_info(val);
+ } else {
+ fprintf(output_fp, "V: [");
+ ndump->alevel++;
+ ndump->adepth--;
+ assoc_dump(val, ndump);
+ ndump->adepth++;
+ ndump->alevel--;
+ indent(indent_level);
}
-
- return make_number((AWKNUM) 0);
+ fprintf(output_fp, "]\n");
}
+
/* do_adump --- dump an array: interface to assoc_dump */
NODE *
do_adump(int nargs)
{
- NODE *r, *a;
+ NODE *symbol, *tmp;
+ static NODE ndump;
+ long depth = 0;
- a = POP();
- if (a->type == Node_param_list) {
- printf(_("%s: is parameter\n"), a->vname);
- a = GET_PARAM(a->param_cnt);
- }
- if (a->type == Node_array_ref) {
- printf(_("%s: array_ref to %s\n"), a->vname,
- a->orig_array->vname);
- a = a->orig_array;
- }
- if (a->type != Node_var_array)
- fatal(_("adump: argument not an array"));
- r = assoc_dump(a, 0);
- return r;
-}
-
-/*
- * The following functions implement the builtin
- * asort function. Initial work by Alan J. Broder,
- * ajb@woti.com.
- */
-
-/* dup_table --- recursively duplicate input array "symbol" */
+ /* depth < 0, no index and value info.
+ * = 0, main array index and value info; does not descend into sub-arrays.
+ * > 0, descends into 'depth' sub-arrays, and prints index and value info.
+ */
-static NODE *
-dup_table(NODE *symbol, NODE *newsymb)
-{
- NODE **old, **new, *chain, *bucket;
- long i;
- unsigned long cursize;
-
- /* find the current hash size */
- cursize = symbol->array_size;
-
- new = NULL;
-
- /* input is a brand new hash table, so there's nothing to copy */
- if (symbol->var_array == NULL)
- newsymb->table_size = 0;
- else {
- /* old hash table there, dupnode stuff into a new table */
-
- /* allocate new table */
- emalloc(new, NODE **, cursize * sizeof(NODE *), "dup_table");
- memset(new, '\0', cursize * sizeof(NODE *));
-
- /* do the copying/dupnode'ing */
- old = symbol->var_array;
- for (i = 0; i < cursize; i++) {
- if (old[i] != NULL) {
- for (chain = old[i]; chain != NULL;
- chain = chain->ahnext) {
- /* get a node for the linked list */
- getnode(bucket);
- bucket->type = Node_ahash;
- bucket->flags |= MALLOC;
- bucket->ahname_ref = 1;
- bucket->ahcode = chain->ahcode;
- if ((chain->flags & NUMIND) != 0) {
- bucket->ahname_num = chain->ahname_num;
- bucket->flags |= NUMIND;
- }
-
- /*
- * copy the corresponding name and
- * value from the original input list
- */
- emalloc(bucket->ahname_str, char *, chain->ahname_len + 2, "dup_table");
- bucket->ahname_len = chain->ahname_len;
-
- memcpy(bucket->ahname_str, chain->ahname_str, chain->ahname_len);
- bucket->ahname_str[bucket->ahname_len] = '\0';
-
- if (chain->ahvalue->type == Node_var_array) {
- NODE *r;
- getnode(r);
- r->type = Node_var_array;
- r->vname = estrdup(chain->ahname_str, chain->ahname_len);
- r->parent_array = newsymb;
- bucket->ahvalue = dup_table(chain->ahvalue, r);
- } else
- bucket->ahvalue = dupnode(chain->ahvalue);
-
- /*
- * put the node on the corresponding
- * linked list in the new table
- */
- bucket->ahnext = new[i];
- new[i] = bucket;
- }
- }
- }
- newsymb->table_size = symbol->table_size;
+ if (nargs == 2) {
+ tmp = POP_SCALAR();
+ depth = (long) force_number(tmp);
+ DEREF(tmp);
}
-
- newsymb->var_array = new;
- newsymb->array_size = cursize;
- newsymb->flags = symbol->flags; /* ARRAYMAXED */
- return newsymb;
+ symbol = POP_PARAM();
+ if (symbol->type != Node_var_array)
+ fatal(_("adump: first argument not an array"));
+
+ ndump.type = Node_dump_array;
+ ndump.adepth = depth;
+ ndump.alevel = 0;
+ assoc_dump(symbol, & ndump);
+ return make_number((AWKNUM) 0);
}
@@ -1059,15 +799,13 @@ asort_actual(int nargs, SORT_CTXT ctxt)
NODE *array, *dest = NULL, *result;
NODE *r, *subs, *s;
NODE **list, **ptr;
-#define TSIZE 100 /* an arbitrary amount */
- static char buf[TSIZE+2];
unsigned long num_elems, i;
const char *sort_str;
if (nargs == 3) /* 3rd optional arg */
s = POP_STRING();
else
- s = Nnull_string; /* "" => default sorting */
+ s = dupnode(Nnull_string); /* "" => default sorting */
s = force_string(s);
sort_str = s->stptr;
@@ -1078,7 +816,6 @@ asort_actual(int nargs, SORT_CTXT ctxt)
sort_str = "@ind_str_asc";
}
-
if (nargs >= 2) { /* 2nd optional arg */
dest = POP_PARAM();
if (dest->type != Node_var_array) {
@@ -1107,15 +844,16 @@ asort_actual(int nargs, SORT_CTXT ctxt)
fatal(ctxt == ASORT ?
_("asort: cannot use a subarray of second arg for first arg") :
_("asorti: cannot use a subarray of second arg for first arg"));
- }
+ }
}
- num_elems = array->table_size;
- if (num_elems == 0 || array->var_array == NULL) { /* source array is empty */
+ if (array_empty(array)) {
+ /* source array is empty */
if (dest != NULL && dest != array)
assoc_clear(dest);
return make_number((AWKNUM) 0);
}
+ num_elems = array->table_size;
/* sorting happens inside assoc_list */
list = assoc_list(array, sort_str, ctxt);
@@ -1132,70 +870,52 @@ asort_actual(int nargs, SORT_CTXT ctxt)
result = dest;
} else {
/* use 'result' as a temporary destination array */
- getnode(result);
- memset(result, '\0', sizeof(NODE));
- result->type = Node_var_array;
+ result = make_array();
result->vname = array->vname;
result->parent_array = array->parent_array;
}
- subs = make_str_node(buf, TSIZE, ALREADY_MALLOCED); /* fake it */
- subs->flags &= ~MALLOC; /* safety */
- for (i = 1, ptr = list; i <= num_elems; i++) {
- sprintf(buf, "%lu", i);
- subs->stlen = strlen(buf);
- /* make number valid in case this array gets sorted later */
- subs->numbr = i;
- subs->flags |= NUMCUR;
- r = *ptr++;
- if (ctxt == ASORTI) {
- /*
- * We want the indices of the source array as values
- * of the 'result' array.
- */
- *assoc_lookup(result, subs, FALSE) =
- make_string(r->ahname_str, r->ahname_len);
- } else {
- NODE *val;
-
- /* We want the values of the source array. */
-
- val = r->ahvalue;
- if (result != dest) {
- /* optimization for dest = NULL or dest = array */
-
- if (val->type == Node_var_array) {
- /* update subarray index in parent array */
- efree(val->vname);
- val->vname = estrdup(subs->stptr, subs->stlen);
- }
- *assoc_lookup(result, subs, FALSE) = val;
- r->ahvalue = Nnull_string;
- } else {
- if (val->type == Node_val)
- *assoc_lookup(result, subs, FALSE) = dupnode(val);
- else {
- NODE *arr;
-
- /*
- * There isn't any reference counting for
- * subarrays, so recursively copy subarrays
- * using dup_table().
- */
- getnode(arr);
- arr->type = Node_var_array;
- arr->var_array = NULL;
- arr->vname = estrdup(subs->stptr, subs->stlen);
- arr->parent_array = array; /* actual parent, not the temporary one. */
- *assoc_lookup(result, subs, FALSE) = dup_table(val, arr);
- }
+ subs = make_number((AWKNUM) 0.0);
+
+ if (ctxt == ASORTI) {
+ /* We want the indices of the source array. */
+
+ for (i = 1, ptr = list; i <= num_elems; i++, ptr += 2) {
+ subs->numbr = (AWKNUM) i;
+ r = *ptr;
+ *assoc_lookup(result, subs) = r;
+ }
+ } else {
+ /* We want the values of the source array. */
+
+ for (i = 1, ptr = list; i <= num_elems; i++) {
+ subs->numbr = (AWKNUM) i;
+
+ /* free index node */
+ r = *ptr++;
+ unref(r);
+
+ /* value node */
+ r = *ptr++;
+
+ /* FIXME: asort(a) optimization */
+
+ if (r->type == Node_val)
+ *assoc_lookup(result, subs) = dupnode(r);
+ else {
+ NODE *arr;
+ arr = make_array();
+ subs = force_string(subs);
+ arr->vname = subs->stptr;
+ subs->stptr = NULL;
+ subs->flags &= ~STRCUR;
+ arr->parent_array = array; /* actual parent, not the temporary one. */
+ *assoc_lookup(result, subs) = assoc_copy(r, arr);
}
}
+ }
- unref(r);
- }
-
- freenode(subs); /* stptr(buf) not malloc-ed */
+ unref(subs);
efree(list);
if (result != dest) {
@@ -1209,7 +929,6 @@ asort_actual(int nargs, SORT_CTXT ctxt)
return make_number((AWKNUM) num_elems);
}
-#undef TSIZE
/* do_asort --- sort array by value */
@@ -1227,6 +946,7 @@ do_asorti(int nargs)
return asort_actual(nargs, ASORTI);
}
+
/*
* cmp_string --- compare two strings; logic similar to cmp_nodes() in eval.c
* except the extra case-sensitive comparison when the case-insensitive
@@ -1241,18 +961,10 @@ cmp_string(const NODE *n1, const NODE *n2)
int ret;
size_t lmin;
- assert(n1->type == n2->type);
- if (n1->type == Node_ahash) {
- s1 = n1->ahname_str;
- len1 = n1->ahname_len;
- s2 = n2->ahname_str;
- len2 = n2->ahname_len;
- } else {
- s1 = n1->stptr;
- len1 = n1->stlen;
- s2 = n2->stptr;
- len2 = n2->stlen;
- }
+ s1 = n1->stptr;
+ len1 = n1->stlen;
+ s2 = n2->stptr;
+ len2 = n2->stlen;
if (len1 == 0)
return len2 == 0 ? 0 : -1;
@@ -1303,7 +1015,7 @@ sort_up_index_string(const void *p1, const void *p2)
}
-/* sort_down_index_string --- descending index strings */
+/* sort_down_index_str --- qsort comparison function; descending index strings. */
static int
sort_down_index_string(const void *p1, const void *p2)
@@ -1326,24 +1038,23 @@ sort_down_index_string(const void *p1, const void *p2)
static int
sort_up_index_number(const void *p1, const void *p2)
{
- const NODE *n1, *n2;
+ const NODE *t1, *t2;
int ret;
- n1 = *((const NODE *const *) p1);
- n2 = *((const NODE *const *) p2);
+ t1 = *((const NODE *const *) p1);
+ t2 = *((const NODE *const *) p2);
- if (n1->ahname_num < n2->ahname_num)
+ if (t1->numbr < t2->numbr)
ret = -1;
else
- ret = (n1->ahname_num > n2->ahname_num);
+ ret = (t1->numbr > t2->numbr);
/* break a tie with the index string itself */
if (ret == 0)
- return cmp_string(n1, n2);
+ return cmp_string(t1, t2);
return ret;
}
-
/* sort_down_index_number --- qsort comparison function; descending index numbers */
static int
@@ -1359,29 +1070,23 @@ static int
sort_up_value_string(const void *p1, const void *p2)
{
const NODE *t1, *t2;
- NODE *n1, *n2;
-
- /* we're passed a pair of index (array subscript) nodes */
- t1 = *(const NODE *const *) p1;
- t2 = *(const NODE *const *) p2;
- /* and we want to compare the element values they refer to */
- n1 = t1->ahvalue;
- n2 = t2->ahvalue;
+ t1 = *((const NODE *const *) p1 + 1);
+ t2 = *((const NODE *const *) p2 + 1);
- if (n1->type == Node_var_array) {
- /* return 0 if n2 is a sub-array too, else return 1 */
- return (n2->type != Node_var_array);
+ if (t1->type == Node_var_array) {
+ /* return 0 if t2 is a sub-array too, else return 1 */
+ return (t2->type != Node_var_array);
}
- if (n2->type == Node_var_array)
- return -1; /* n1 (scalar) < n2 (sub-array) */
+ if (t2->type == Node_var_array)
+ return -1; /* t1 (scalar) < t2 (sub-array) */
- /* n1 and n2 both have string values; See sort_force_value_string(). */
- return cmp_string(n1, n2);
+ /* t1 and t2 both have string values */
+ return cmp_string(t1, t2);
}
-/* sort_down_value_string --- descending value string */
+/* sort_down_value_string --- qsort comparison function; descending value string */
static int
sort_down_value_string(const void *p1, const void *p2)
@@ -1389,50 +1094,46 @@ sort_down_value_string(const void *p1, const void *p2)
return -sort_up_value_string(p1, p2);
}
+
/* sort_up_value_number --- qsort comparison function; ascending value number */
static int
sort_up_value_number(const void *p1, const void *p2)
{
- const NODE *t1, *t2;
- NODE *n1, *n2;
+ NODE *t1, *t2;
int ret;
- /* we're passed a pair of index (array subscript) nodes */
- t1 = *(const NODE *const *) p1;
- t2 = *(const NODE *const *) p2;
-
- /* and we want to compare the element values they refer to */
- n1 = t1->ahvalue;
- n2 = t2->ahvalue;
+ t1 = *((NODE *const *) p1 + 1);
+ t2 = *((NODE *const *) p2 + 1);
- if (n1->type == Node_var_array) {
- /* return 0 if n2 is a sub-array too, else return 1 */
- return (n2->type != Node_var_array);
+ if (t1->type == Node_var_array) {
+ /* return 0 if t2 is a sub-array too, else return 1 */
+ return (t2->type != Node_var_array);
}
- if (n2->type == Node_var_array)
- return -1; /* n1 (scalar) < n2 (sub-array) */
+ if (t2->type == Node_var_array)
+ return -1; /* t1 (scalar) < t2 (sub-array) */
- /* n1 and n2 both Node_val, and force_number'ed */
- if (n1->numbr < n2->numbr)
+ /* t1 and t2 both Node_val, and force_number'ed */
+ if (t1->numbr < t2->numbr)
ret = -1;
else
- ret = (n1->numbr > n2->numbr);
+ ret = (t1->numbr > t2->numbr);
if (ret == 0) {
/*
* Use string value to guarantee same sort order on all
* versions of qsort().
*/
- n1 = force_string(n1);
- n2 = force_string(n2);
- ret = cmp_string(n1, n2);
+ t1 = force_string(t1);
+ t2 = force_string(t2);
+ ret = cmp_string(t1, t2);
}
return ret;
}
-/* sort_down_value_number --- descending value number */
+
+/* sort_down_value_number --- qsort comparison function; descending value number */
static int
sort_down_value_number(const void *p1, const void *p2)
@@ -1440,21 +1141,17 @@ sort_down_value_number(const void *p1, const void *p2)
return -sort_up_value_number(p1, p2);
}
+
/* sort_up_value_type --- qsort comparison function; ascending value type */
static int
sort_up_value_type(const void *p1, const void *p2)
{
- const NODE *t1, *t2;
NODE *n1, *n2;
- /* we're passed a pair of index (array subscript) nodes */
- t1 = *(const NODE *const *) p1;
- t2 = *(const NODE *const *) p2;
-
- /* and we want to compare the element values they refer to */
- n1 = t1->ahvalue;
- n2 = t2->ahvalue;
+ /* we want to compare the element values */
+ n1 = *((NODE *const *) p1 + 1);
+ n2 = *((NODE *const *) p2 + 1);
/* 1. Arrays vs. scalar, scalar is less than array */
if (n1->type == Node_var_array) {
@@ -1472,6 +1169,12 @@ sort_up_value_type(const void *p1, const void *p2)
if ((n2->flags & MAYBE_NUM) != 0)
(void) force_number(n2);
+ /* 2.5. Resolve INTIND, so that is STRING, and not NUMBER */
+ if ((n1->flags & INTIND) != 0)
+ (void) force_string(n1);
+ if ((n2->flags & INTIND) != 0)
+ (void) force_string(n2);
+
if ((n1->flags & NUMBER) != 0 && (n2->flags & NUMBER) != 0) {
if (n1->numbr < n2->numbr)
return -1;
@@ -1492,7 +1195,7 @@ sort_up_value_type(const void *p1, const void *p2)
return cmp_string(n1, n2);
}
-/* sort_down_value_type --- descending value type */
+/* sort_down_value_type --- qsort comparison function; descending value type */
static int
sort_down_value_type(const void *p1, const void *p2)
@@ -1505,27 +1208,26 @@ sort_down_value_type(const void *p1, const void *p2)
static int
sort_user_func(const void *p1, const void *p2)
{
- const NODE *t1, *t2;
NODE *idx1, *idx2, *val1, *val2;
AWKNUM ret;
INSTRUCTION *code;
extern int exiting;
- t1 = *((const NODE *const *) p1);
- t2 = *((const NODE *const *) p2);
-
- idx1 = make_string(t1->ahname_str, t1->ahname_len);
- idx2 = make_string(t2->ahname_str, t2->ahname_len);
- val1 = t1->ahvalue;
- val2 = t2->ahvalue;
+ idx1 = *((NODE *const *) p1);
+ idx2 = *((NODE *const *) p2);
+ val1 = *((NODE *const *) p1 + 1);
+ val2 = *((NODE *const *) p2 + 1);
code = TOP()->code_ptr; /* comparison function call instructions */
/* setup 4 arguments to comp_func() */
+ UPREF(idx1);
PUSH(idx1);
if (val1->type == Node_val)
UPREF(val1);
PUSH(val1);
+
+ UPREF(idx2);
PUSH(idx2);
if (val2->type == Node_val)
UPREF(val2);
@@ -1543,113 +1245,73 @@ sort_user_func(const void *p1, const void *p2)
return (ret < 0.0) ? -1 : (ret > 0.0);
}
-/* sort_force_index_number -- pre-process list items for sorting indices as numbers */
-
-static void
-sort_force_index_number(NODE **list, size_t num_elems)
-{
- size_t i;
- NODE *r;
- static NODE temp_node;
-
- for (i = 0; i < num_elems; i++) {
- r = list[i];
-
- if ((r->flags & NUMIND) != 0) /* once in a lifetime is plenty */
- continue;
- temp_node.type = Node_val;
- temp_node.stptr = r->ahname_str;
- temp_node.stlen = r->ahname_len;
- temp_node.flags = 0; /* only interested in the return value of r_force_number */
- r->ahname_num = r_force_number(& temp_node);
- r->flags |= NUMIND;
- }
-}
-
-/* sort_force_value_number -- pre-process list items for sorting values as numbers */
-
-static void
-sort_force_value_number(NODE **list, size_t num_elems)
-{
- size_t i;
- NODE *r, *val;
-
- for (i = 0; i < num_elems; i++) {
- r = list[i];
- val = r->ahvalue;
- if (val->type == Node_val)
- (void) force_number(val);
- }
-}
-
-/* sort_force_value_string -- pre-process list items for sorting values as strings */
-
-static void
-sort_force_value_string(NODE **list, size_t num_elems)
-{
- size_t i;
- NODE *r, *val;
-
- for (i = 0; i < num_elems; i++) {
- r = list[i];
- val = r->ahvalue;
- if (val->type == Node_val)
- r->ahvalue = force_string(val);
- }
-}
/* assoc_list -- construct, and optionally sort, a list of array elements */
NODE **
-assoc_list(NODE *array, const char *sort_str, SORT_CTXT sort_ctxt)
+assoc_list(NODE *symbol, const char *sort_str, SORT_CTXT sort_ctxt)
{
- typedef void (*qsort_prefunc)(NODE **, size_t);
typedef int (*qsort_compfunc)(const void *, const void *);
static const struct qsort_funcs {
const char *name;
qsort_compfunc comp_func;
- qsort_prefunc pre_func; /* pre-processing of list items */
+ enum assoc_list_flags flags;
} sort_funcs[] = {
- { "@ind_str_asc", sort_up_index_string, 0 },
- { "@ind_num_asc", sort_up_index_number, sort_force_index_number },
- { "@val_str_asc", sort_up_value_string, sort_force_value_string },
- { "@val_num_asc", sort_up_value_number, sort_force_value_number },
- { "@ind_str_desc", sort_down_index_string, 0 },
- { "@ind_num_desc", sort_down_index_number, sort_force_index_number },
- { "@val_str_desc", sort_down_value_string, sort_force_value_string },
- { "@val_num_desc", sort_down_value_number, sort_force_value_number },
- { "@val_type_asc", sort_up_value_type, 0 },
- { "@val_type_desc", sort_down_value_type, 0 },
- { "@unsorted", 0, 0 },
- };
+{ "@ind_str_asc", sort_up_index_string, AINDEX|AISTR|AASC },
+{ "@ind_num_asc", sort_up_index_number, AINDEX|AINUM|AASC },
+{ "@val_str_asc", sort_up_value_string, AVALUE|AVSTR|AASC },
+{ "@val_num_asc", sort_up_value_number, AVALUE|AVNUM|AASC },
+{ "@ind_str_desc", sort_down_index_string, AINDEX|AISTR|ADESC },
+{ "@ind_num_desc", sort_down_index_number, AINDEX|AINUM|ADESC },
+{ "@val_str_desc", sort_down_value_string, AVALUE|AVSTR|ADESC },
+{ "@val_num_desc", sort_down_value_number, AVALUE|AVNUM|ADESC },
+{ "@val_type_asc", sort_up_value_type, AVALUE|AASC },
+{ "@val_type_desc", sort_down_value_type, AVALUE|ADESC },
+{ "@unsorted", 0, AINDEX },
+};
+
+ /* N.B.: AASC and ADESC are hints to the specific array types.
+ * See cint_list() in cint_array.c.
+ */
+
NODE **list;
- NODE *r;
- size_t num_elems, i, j;
+ NODE fl;
+ unsigned long num_elems, j;
+ int elem_size, qi;
qsort_compfunc cmp_func = 0;
- qsort_prefunc pre_func = 0;
INSTRUCTION *code = NULL;
- int qi;
extern int currule;
- num_elems = array->table_size;
+ num_elems = symbol->table_size;
assert(num_elems > 0);
+ elem_size = 1;
+ fl.flags = 0;
+
for (qi = 0, j = sizeof(sort_funcs)/sizeof(sort_funcs[0]); qi < j; qi++) {
if (strcmp(sort_funcs[qi].name, sort_str) == 0)
break;
}
- if (qi >= 0 && qi < j) {
+ if (qi < j) {
cmp_func = sort_funcs[qi].comp_func;
- pre_func = sort_funcs[qi].pre_func;
+ fl.flags = sort_funcs[qi].flags;
+
+ if (symbol->array_funcs != cint_array_func)
+ fl.flags &= ~(AASC|ADESC);
- } else { /* unrecognized */
+ if (sort_ctxt != SORTED_IN || (fl.flags & AVALUE) != 0) {
+ /* need index and value pair in the list */
+
+ fl.flags |= (AINDEX|AVALUE);
+ elem_size = 2;
+ }
+
+ } else { /* unrecognized */
NODE *f;
const char *sp;
- assert(sort_str != NULL);
-
for (sp = sort_str; *sp != '\0'
&& ! isspace((unsigned char) *sp); sp++)
continue;
@@ -1663,7 +1325,10 @@ assoc_list(NODE *array, const char *sort_str, SORT_CTXT sort_ctxt)
fatal(_("sort comparison function `%s' is not defined"), sort_str);
cmp_func = sort_user_func;
- /* pre_func is still NULL */
+
+ /* need index and value pair in the list */
+ fl.flags |= (AVALUE|AINDEX);
+ elem_size = 2;
/* make function call instructions */
code = bcalloc(Op_func_call, 2, 0);
@@ -1683,23 +1348,12 @@ assoc_list(NODE *array, const char *sort_str, SORT_CTXT sort_ctxt)
PUSH_CODE(code);
}
- /* allocate space for array; the extra space is used in for(i in a) opcode (eval.c) */
- emalloc(list, NODE **, (num_elems + 1) * sizeof(NODE *), "assoc_list");
-
- /* populate it */
- for (i = j = 0; i < array->array_size; i++)
- for (r = array->var_array[i]; r != NULL; r = r->ahnext)
- list[j++] = dupnode(r);
- list[num_elems] = NULL;
+ list = symbol->alist(symbol, & fl);
- if (! cmp_func) /* unsorted */
- return list;
+ if (! cmp_func || (fl.flags & (AASC|ADESC)) != 0)
+ return list; /* unsorted or list already sorted */
- /* special pre-processing of list items */
- if (pre_func)
- pre_func(list, num_elems);
-
- qsort(list, num_elems, sizeof(NODE *), cmp_func); /* shazzam! */
+ qsort(list, num_elems, elem_size * sizeof(NODE *), cmp_func); /* shazzam! */
if (cmp_func == sort_user_func) {
code = POP_CODE();
@@ -1708,72 +1362,17 @@ assoc_list(NODE *array, const char *sort_str, SORT_CTXT sort_ctxt)
bcfree(code); /* Op_func_call */
}
- return list;
-}
-
-
-/*
-From bonzini@gnu.org Mon Oct 28 16:05:26 2002
-Date: Mon, 28 Oct 2002 13:33:03 +0100
-From: Paolo Bonzini <bonzini@gnu.org>
-To: arnold@skeeve.com
-Subject: Hash function
-Message-ID: <20021028123303.GA6832@biancaneve>
-
-Here is the hash function I'm using in GNU Smalltalk. The scrambling is
-needed if you use powers of two as the table sizes. If you use primes it
-is not needed.
-
-To use double-hashing with power-of-two size, you should use the
-_gst_hash_string(str, len) as the primary hash and
-scramble(_gst_hash_string (str, len)) | 1 as the secondary hash.
-
-Paolo
-
-*/
-/*
- * ADR: Slightly modified to work w/in the context of gawk.
- */
-
-static unsigned long
-gst_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code)
-{
- unsigned long hashVal = 1497032417; /* arbitrary value */
- unsigned long ret;
-
- while (len--) {
- hashVal += *str++;
- hashVal += (hashVal << 10);
- hashVal ^= (hashVal >> 6);
- }
-
- ret = scramble(hashVal);
-
- if (code != NULL)
- *code = ret;
-
- if (ret >= hsize)
- ret %= hsize;
-
- return ret;
-}
+ if (sort_ctxt == SORTED_IN
+ && (fl.flags & (AINDEX|AVALUE)) == (AINDEX|AVALUE)
+ ) {
+ /* relocate all index nodes to the first half of the list. */
+ for (j = 1; j < num_elems; j++)
+ list[j] = list[2 * j];
-static unsigned long
-scramble(unsigned long x)
-{
- if (sizeof(long) == 4) {
- int y = ~x;
+ /* give back extra memory */
- x += (y << 10) | (y >> 22);
- x += (x << 6) | (x >> 26);
- x -= (x << 16) | (x >> 16);
- } else {
- x ^= (~x) >> 31;
- x += (x << 21) | (x >> 11);
- x += (x << 5) | (x >> 27);
- x += (x << 27) | (x >> 5);
- x += (x << 31);
+ erealloc(list, NODE **, num_elems * sizeof(NODE *), "assoc_list");
}
- return x;
+ return list;
}