Speed/memory performance improvements.

author: john haque <j.eh@mchsi.com> 2011-08-20 08:28:02 -0500
committer: john haque <j.eh@mchsi.com> 2011-10-12 07:33:05 -0500
commit: 1fea520248b42ca995c8797698c60301ea42ffe9 (patch)
tree: 1aa80c13392c25aa6bf3eb931fec9c83a0621e25 /array.c
parent: f757e147f1ae8254669b3222bc24a39ee8ff9b8f (diff)
download: gawk-1fea520248b42ca995c8797698c60301ea42ffe9.tar.gz
1 files changed, 489 insertions, 890 deletions
diff --git a/array.c b/array.c
index 82e99a4b..91b87572 100644
--- a/array.c
+++ b/array.c
@@ -1,5 +1,5 @@
 /*
- * array.c - routines for associative arrays.
+ * array.c - routines for awk arrays.
  */
 
 /* 
@@ -25,69 +25,190 @@
 
 #include "awk.h"
 
-/*
- * Tree walks (``for (iggy in foo)'') and array deletions use expensive
- * linear searching.  So what we do is start out with small arrays and
- * grow them as needed, so that our arrays are hopefully small enough,
- * most of the time, that they're pretty full and we're not looking at
- * wasted space.
- *
- * The decision is made to grow the array if the average chain length is
- * ``too big''. This is defined as the total number of entries in the table
- * divided by the size of the array being greater than some constant.
- *
- * We make the constant a variable, so that it can be tweaked
- * via environment variable.
- */
-
-static size_t AVG_CHAIN_MAX = 2;	/* Modern machines are bigger, reduce this from 10. */
+extern FILE *output_fp;
+extern NODE **fmt_list;          /* declared in eval.c */
+extern array_ptr str_array_func[];
+extern array_ptr cint_array_func[];
+extern array_ptr int_array_func[];
 
 static size_t SUBSEPlen;
 static char *SUBSEP;
+static char indent_char[] = "    ";
 
-static NODE *assoc_find(NODE *symbol, NODE *subs, unsigned long hash1, NODE **last);
-static void grow_table(NODE *symbol);
+static NODE **e_lookup(NODE *symbol, NODE *subs);
+static array_ptr empty_array_func[NUM_AFUNCS] = {
+	(array_ptr) 0,
+	(array_ptr) 0,
+	e_lookup,
+};
 
-static unsigned long gst_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code);
-static unsigned long scramble(unsigned long x);
-static unsigned long awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code);
+#define MAX_ATYPE 10
 
-unsigned long (*hash)(const char *s, size_t len, unsigned long hsize, size_t *code) = awk_hash;
+static array_ptr *atypes[MAX_ATYPE];
+static int num_atypes = 0;
 
-/* qsort comparison function */
-static int sort_up_index_string(const void *, const void *);
-static int sort_down_index_string(const void *, const void *);
-static int sort_up_index_number(const void *, const void *);
-static int sort_down_index_number(const void *, const void *);
-static int sort_up_value_string(const void *, const void *);
-static int sort_down_value_string(const void *, const void *);
-static int sort_up_value_number(const void *, const void *);
-static int sort_down_value_number(const void *, const void *);
-static int sort_up_value_type(const void *, const void *);
-static int sort_down_value_type(const void *, const void *);
+/*
+ *	index 0 : initialization.
+ *	index 1 : check if index is compatible.
+ *	index 8 : array dump, memory and other statistics (do_adump).
+ *                Also used by debugger 'examine' command.
+ */
+ 
+
+int
+register_array_func(array_ptr *afunc)
+{
+	if (afunc && num_atypes < MAX_ATYPE) {
+		if (afunc != str_array_func && ! afunc[1])
+			return FALSE;
+		atypes[num_atypes++] = afunc;
+		if (afunc[0])	/* execute init routine if any */
+			(void) (*afunc[0])(NULL, NULL);
+		return TRUE;
+	}
+	return FALSE;
+}
 
-/* array_init --- check relevant environment variables */
+/* array_init --- register all builtin array types */
 
 void
 array_init()
 {
-	const char *val;
-	char *endptr;
-	size_t newval;
-
-	if ((val = getenv("AVG_CHAIN_MAX")) != NULL && isdigit((unsigned char) *val)) {
-		newval = strtoul(val, & endptr, 10);
-		if (endptr != val && newval > 0)
-			AVG_CHAIN_MAX = newval;
+	(void) register_array_func(str_array_func);	/* the default */
+	(void) register_array_func(int_array_func);
+	(void) register_array_func(cint_array_func);
+}
+
+/* make_array --- create an array node */
+
+NODE *
+make_array()
+{
+	NODE *array;
+	getnode(array);
+	memset(array, '\0', sizeof(NODE));
+	array->type = Node_var_array;
+	array->array_funcs = empty_array_func;
+	/* vname, flags, and parent_array not set here */
+
+	return array;
+}		
+
+
+/* init_array --- initialize an array node */
+
+void
+init_array(NODE *symbol)
+{
+	symbol->type = Node_var_array;
+	symbol->array_funcs = empty_array_func;
+	symbol->buckets = NULL;
+	symbol->table_size = symbol->array_size = 0;
+	symbol->array_capacity = 0;
+
+	assert(symbol->xarray == NULL);
+	/* symbol->xarray = NULL; */
+
+	/* flags, vname, parent_array not (re)initialized */
+}
+
+
+/* e_lookup: assign type to an empty array. */
+
+static NODE **
+e_lookup(NODE *symbol, NODE *subs)
+{
+	int i;
+	array_ptr *afunc = NULL;
+
+	assert(array_empty(symbol) == TRUE);
+
+	/* Check which array type wants to accept this sub; traverse
+	 * array type list in reverse order.
+	 */
+	for (i = num_atypes - 1; i >= 1; i--) {
+		afunc = atypes[i];
+		if (afunc[1](symbol, subs) != NULL)
+			break;
+	}
+	if (i == 0 || afunc == NULL)
+		afunc = atypes[0];	/* default is str_array_func */
+	symbol->array_funcs = afunc;
+
+	/* We have the right type of array; install the subscript */
+	return symbol->alookup(symbol, subs);
+}
+
+
+/* assoc_clear --- flush all the values in symbol[] */
+
+void
+assoc_clear(NODE *symbol)
+{
+	if (! array_empty(symbol))
+		(void) symbol->aclear(symbol, NULL);
+}
+
+
+/* r_in_array --- test whether the array element symbol[subs] exists or not,
+ *	return pointer to value if it does.
+ */
+
+NODE *
+r_in_array(NODE *symbol, NODE *subs)
+{
+	NODE **ret;
+
+	if (array_empty(symbol))
+		return NULL;
+	ret = symbol->aexists(symbol, subs);
+	return (ret ? *ret : NULL);
+}
+
+
+/* assoc_remove --- remove an index from symbol[] */
+
+int
+assoc_remove(NODE *symbol, NODE *subs)
+{
+	if (array_empty(symbol))
+		return FALSE;
+	return (symbol->aremove(symbol, subs) != NULL);
+}
+
+
+/* assoc_copy --- duplicate input array "symbol" */
+
+NODE *
+assoc_copy(NODE *symbol, NODE *newsymb)
+{
+	assert(newsymb->vname != NULL);
+
+	assoc_clear(newsymb);
+	if (! array_empty(symbol)) {
+		(void) symbol->acopy(symbol, newsymb);
+		newsymb->array_funcs = symbol->array_funcs;
+		newsymb->flags = symbol->flags;
 	}
+	return newsymb;
+}
 
-	if ((val = getenv("AWK_HASH")) != NULL && strcmp(val, "gst") == 0)
-		hash = gst_hash_string; 
+
+/* assoc_dump --- dump array */
+
+void
+assoc_dump(NODE *symbol, NODE *ndump)
+{
+	if (array_empty(symbol))
+		fprintf(output_fp, "array `%s' is empty\n", array_vname(symbol));
+	else if (symbol->adump)	
+		(void) symbol->adump(symbol, ndump);
 }
 
+
 /* make_aname --- construct a 'vname' for a (sub)array */
 
-static char *
+const char *
 make_aname(const NODE *symbol)
 {
 	static char *aname = NULL;
@@ -115,10 +236,11 @@ make_aname(const NODE *symbol)
 			erealloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
 		}
 		memcpy(aname, symbol->vname, alen + 1);
-	} 
+	}
 	return aname;
-#undef SLEN
 }
+#undef SLEN
+
 
 /*
  * array_vname --- print the name of the array
@@ -128,7 +250,7 @@ make_aname(const NODE *symbol)
  * to save it, they have to make a copy.
  */
 
-char *
+const char *
 array_vname(const NODE *symbol)
 {
 	static char *message = NULL;
@@ -164,7 +286,6 @@ array_vname(const NODE *symbol)
 	else
 		aname = make_aname(symbol);
 	len += strlen(aname);
-
 	/*
 	 * Each node contributes by strlen(from) minus the length
 	 * of "%s" in the translation (which is at least 2)
@@ -218,7 +339,7 @@ get_array(NODE *symbol, int canfatal)
 	NODE *save_symbol = symbol;
 	int isparam = FALSE;
 
-	if (symbol->type == Node_param_list && (symbol->flags & FUNC) == 0) {
+	if (symbol->type == Node_param_list) {
 		save_symbol = symbol = GET_PARAM(symbol->param_cnt);
 		isparam = TRUE;
 		if (symbol->type == Node_array_ref)
@@ -227,35 +348,24 @@ get_array(NODE *symbol, int canfatal)
 
 	switch (symbol->type) {
 	case Node_var_new:
-		symbol->type = Node_var_array;
-		symbol->var_array = NULL;
+		init_array(symbol);
 		symbol->parent_array = NULL;	/* main array has no parent */
 		/* fall through */
 	case Node_var_array:
 		break;
 
 	case Node_array_ref:
-	case Node_param_list:
-		if ((symbol->flags & FUNC) == 0)
-			cant_happen();
-		/* else
-			fall through */
-
 	default:
-		/* notably Node_var but catches also e.g. FS[1] = "x" */
+		/* notably Node_var but catches also e.g. a[1] = "x"; a[1][1] = "y" */
 		if (canfatal) {
 			if (symbol->type == Node_val)
 				fatal(_("attempt to use a scalar value as array"));
-
-			if ((symbol->flags & FUNC) != 0)
-				fatal(_("attempt to use function `%s' as an array"),
-								save_symbol->vname);
-			else if (isparam)
+			if (isparam)
 				fatal(_("attempt to use scalar parameter `%s' as an array"),
-								save_symbol->vname);
+					save_symbol->vname);
 			else
 				fatal(_("attempt to use scalar `%s' as an array"),
-								save_symbol->vname);
+					save_symbol->vname);
 		} else
 			break;
 	}
@@ -269,16 +379,18 @@ get_array(NODE *symbol, int canfatal)
 void
 set_SUBSEP()
 {
-	SUBSEP = force_string(SUBSEP_node->var_value)->stptr;
+	SUBSEP_node->var_value = force_string(SUBSEP_node->var_value);
+	SUBSEP = SUBSEP_node->var_value->stptr;
 	SUBSEPlen = SUBSEP_node->var_value->stlen;
-}                     
+}
+
 
 /* concat_exp --- concatenate expression list into a single string */
 
 NODE *
 concat_exp(int nargs, int do_subsep)
 {
-	/* do_subsep is false for Node-concat */
+	/* do_subsep is FALSE for Op_concat */
 	NODE *r;
 	char *str;
 	char *s;
@@ -295,13 +407,14 @@ concat_exp(int nargs, int do_subsep)
 
 	len = 0;
 	for (i = 1; i <= nargs; i++) {
-		r = POP();
+		r = TOP();
 		if (r->type == Node_var_array) {
 			while (--i > 0)
 				DEREF(args_array[i]);	/* avoid memory leak */
 			fatal(_("attempt to use array `%s' in a scalar context"), array_vname(r));
-		} 
-		args_array[i] = force_string(r);
+		}
+		r = POP_STRING();
+		args_array[i] = r;
 		len += r->stlen;
 	}
 	len += (nargs - 1) * subseplen;
@@ -325,247 +438,7 @@ concat_exp(int nargs, int do_subsep)
 		DEREF(r);
 	}
 
-	return make_str_node(str, len, ALREADY_MALLOCED);
-}
-
-
-/* assoc_clear --- flush all the values in symbol[] */
-
-void
-assoc_clear(NODE *symbol)
-{
-	long i;
-	NODE *bucket, *next;
-
-	if (symbol->var_array == NULL)
-		return;
-
-	for (i = 0; i < symbol->array_size; i++) {
-		for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
-			next = bucket->ahnext;
-			if (bucket->ahvalue->type == Node_var_array) {
-				NODE *r = bucket->ahvalue;
-				assoc_clear(r);		/* recursively clear all sub-arrays */
-				efree(r->vname);			
-				freenode(r);
-			} else
-				unref(bucket->ahvalue);
-
-			unref(bucket);	/* unref() will free the ahname_str */
-		}
-		symbol->var_array[i] = NULL;
-	}
-	efree(symbol->var_array);
-	symbol->var_array = NULL;
-	symbol->array_size = symbol->table_size = 0;
-	symbol->flags &= ~ARRAYMAXED;
-}
-
-/* awk_hash --- calculate the hash function of the string in subs */
-
-static unsigned long
-awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code)
-{
-	unsigned long h = 0;
-	unsigned long htmp;
-
-	/*
-	 * Ozan Yigit's original sdbm hash, copied from Margo Seltzers
-	 * db package.
-	 *
-	 * This is INCREDIBLY ugly, but fast.  We break the string up into
-	 * 8 byte units.  On the first time through the loop we get the
-	 * "leftover bytes" (strlen % 8).  On every other iteration, we
-	 * perform 8 HASHC's so we handle all 8 bytes.  Essentially, this
-	 * saves us 7 cmp & branch instructions.  If this routine is
-	 * heavily used enough, it's worth the ugly coding.
-	 */
-
-	/*
-	 * Even more speed:
-	 * #define HASHC   h = *s++ + 65599 * h
-	 * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
-	 *
-	 * 4/2011: Force the results to 32 bits, to get the same
-	 * result on both 32- and 64-bit systems. This may be a
-	 * bad idea.
-	 */
-#define HASHC   htmp = (h << 6);  \
-		h = *s++ + htmp + (htmp << 10) - h ; \
-		htmp &= 0xFFFFFFFF; \
-		h &= 0xFFFFFFFF
-
-	h = 0;
-
-	/* "Duff's Device" */
-	if (len > 0) {
-		size_t loop = (len + 8 - 1) >> 3;
-
-		switch (len & (8 - 1)) {
-		case 0:
-			do {	/* All fall throughs */
-				HASHC;
-		case 7:		HASHC;
-		case 6:		HASHC;
-		case 5:		HASHC;
-		case 4:		HASHC;
-		case 3:		HASHC;
-		case 2:		HASHC;
-		case 1:		HASHC;
-			} while (--loop);
-		}
-	}
-
-	if (code != NULL)
-		*code = h;
-
-	if (h >= hsize)
-		h %= hsize;
-	return h;
-}
-
-/* assoc_find --- locate symbol[subs] */
-
-static NODE *				/* NULL if not found */
-assoc_find(NODE *symbol, NODE *subs, unsigned long hash1, NODE **last)
-{
-	NODE *bucket, *prev;
-	const char *s1_str;
-	size_t s1_len;
-	NODE *s2;
-
-	for (prev = NULL, bucket = symbol->var_array[hash1]; bucket != NULL;
-			prev = bucket, bucket = bucket->ahnext) {
-		/*
-		 * This used to use cmp_nodes() here.  That's wrong.
-		 * Array indices are strings; compare as such, always!
-		 */
-		s1_str = bucket->ahname_str;
-		s1_len = bucket->ahname_len;
-		s2 = subs;
-
-		if (s1_len == s2->stlen) {
-			if (s1_len == 0		/* "" is a valid index */
-			    || memcmp(s1_str, s2->stptr, s1_len) == 0)
-				break;
-		}
-	}
-	if (last != NULL)
-		*last = prev;
-	return bucket;
-}
-
-/* in_array --- test whether the array element symbol[subs] exists or not,
- * 		return pointer to value if it does.
- */
-
-NODE *
-in_array(NODE *symbol, NODE *subs)
-{
-	unsigned long hash1;
-	NODE *ret;
-
-	assert(symbol->type == Node_var_array);
-
-	if (symbol->var_array == NULL)
-		return NULL;
-
-	hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size, NULL);
-	ret = assoc_find(symbol, subs, hash1, NULL);
-	return (ret ? ret->ahvalue : NULL);
-}
-
-/*
- * assoc_lookup:
- * Find SYMBOL[SUBS] in the assoc array.  Install it with value "" if it
- * isn't there. Returns a pointer ala get_lhs to where its value is stored.
- *
- * SYMBOL is the address of the node (or other pointer) being dereferenced.
- * SUBS is a number or string used as the subscript. 
- */
-
-NODE **
-assoc_lookup(NODE *symbol, NODE *subs, int reference)
-{
-	unsigned long hash1;
-	NODE *bucket;
-	size_t code;
-
-	assert(symbol->type == Node_var_array);
-
-	(void) force_string(subs);
-
-	if (symbol->var_array == NULL) {
-		symbol->array_size = symbol->table_size = 0;	/* sanity */
-		symbol->flags &= ~ARRAYMAXED;
-		grow_table(symbol);
-		hash1 = hash(subs->stptr, subs->stlen,
-				(unsigned long) symbol->array_size, & code);
-	} else {
-		hash1 = hash(subs->stptr, subs->stlen,
-				(unsigned long) symbol->array_size, & code);
-		bucket = assoc_find(symbol, subs, hash1, NULL);
-		if (bucket != NULL)
-			return &(bucket->ahvalue);
-	}
-
-	if (do_lint && reference) {
-		lintwarn(_("reference to uninitialized element `%s[\"%.*s\"]'"),
-		      array_vname(symbol), (int)subs->stlen, subs->stptr);
-	}
-
-	/* It's not there, install it. */
-	if (do_lint && subs->stlen == 0)
-		lintwarn(_("subscript of array `%s' is null string"),
-			array_vname(symbol));
-
-	/* first see if we would need to grow the array, before installing */
-	symbol->table_size++;
-	if ((symbol->flags & ARRAYMAXED) == 0
-	    && (symbol->table_size / symbol->array_size) > AVG_CHAIN_MAX) {
-		grow_table(symbol);
-		/* have to recompute hash value for new size */
-		hash1 = code % (unsigned long) symbol->array_size;
-	}
-
-	getnode(bucket);
-	bucket->type = Node_ahash;
-
-	/*
-	 * Freeze this string value --- it must never
-	 * change, no matter what happens to the value
-	 * that created it or to CONVFMT, etc.
-	 *
-	 * One day: Use an atom table to track array indices,
-	 * and avoid the extra memory overhead.
-	 */
-	bucket->flags |= MALLOC;
-	bucket->ahname_ref = 1;
-
-	emalloc(bucket->ahname_str, char *, subs->stlen + 2, "assoc_lookup");
-	bucket->ahname_len = subs->stlen;
-	memcpy(bucket->ahname_str, subs->stptr, subs->stlen);
-	bucket->ahname_str[bucket->ahname_len] = '\0';
-	bucket->ahvalue = Nnull_string;
- 
-	bucket->ahnext = symbol->var_array[hash1];
-	bucket->ahcode = code;
-
-	/*
-	 * Set the numeric value for the index if it's  available. Useful
-	 * for numeric sorting by index.  Do this only if the numeric
-	 * value is available, instead of all the time, since doing it
-	 * all the time is a big performance hit for something that may
-	 * never be used.
-	 */
-	if ((subs->flags & NUMCUR) != 0) {
-		bucket->ahname_num = subs->numbr;
-		bucket->flags |= NUMIND;
-	}
-
-	/* hook it into the symbol table */
-	symbol->var_array[hash1] = bucket;
-	return &(bucket->ahvalue);
+	return make_str_node(str, len);
 }
 
 
@@ -602,7 +475,7 @@ adjust_fcall_stack(NODE *symbol, int nsubs)
 	func = frame_ptr->func_node;
 	if (func == NULL)	/* in main */
 		return;
-	pcount = func->lnode->param_cnt;
+	pcount = func->param_cnt;
 	sp = frame_ptr->stack;
 
 	for (; pcount > 0; pcount--) {
@@ -627,12 +500,10 @@ adjust_fcall_stack(NODE *symbol, int nsubs)
 			 *   function f(c, d) { delete c; ..}
 			 *   BEGIN { a[0][0] = 1; f(a[0], a[0]); ...}  
 			 */
-			char *save;
-local_array:
-			save = r->vname;
-			memset(r, '\0', sizeof(NODE));
-			r->vname = save;
-			r->type = Node_var_array;
+
+			init_array(r);
+			r->parent_array = NULL;
+			r->flags = 0;
 			continue;
 		}			
 
@@ -648,8 +519,10 @@ local_array:
 				 *    BEGIN { a[0][0][0][0] = 1; f(a[0], a[0][0][0]); .. }
 				 *
 				 */
-
-				goto local_array;
+				init_array(r);
+				r->parent_array = NULL;
+				r->flags = 0;
+				break;
 			}
 		}
 	}
@@ -660,18 +533,17 @@ local_array:
 
 /*
  * `symbol' is array
- * `nsubs' is number of subscripts
+ * `nsubs' is no of subscripts
  */
 
 void
 do_delete(NODE *symbol, int nsubs)
 {
-	unsigned long hash1 = 0;
-	NODE *subs, *bucket, *last, *r;
+	NODE *val, *subs;
 	int i;
 
 	assert(symbol->type == Node_var_array);
-	subs = bucket = last = r = NULL;	/* silence the compiler */
+	subs = val = NULL;	/* silence the compiler */
 
 	/*
 	 * The force_string() call is needed to make sure that
@@ -683,16 +555,17 @@ do_delete(NODE *symbol, int nsubs)
 	 * Without it, the code does not fail.
 	 */
 
-#define free_subs(n) \
-do {								\
+#define free_subs(n)    do {                                    \
     NODE *s = PEEK(n - 1);                                      \
     if (s->type == Node_val) {                                  \
-        (void) force_string(s);	/* may have side effects ? */   \
+        (void) force_string(s);	/* may have side effects. */    \
         DEREF(s);                                               \
     }                                                           \
 } while (--n > 0)
 
-	if (nsubs == 0) {	/* delete array */
+	if (nsubs == 0) {
+		/* delete array */
+
 		adjust_fcall_stack(symbol, 0);	/* fix function call stack; See above. */
 		assoc_clear(symbol);
 		return;
@@ -706,66 +579,46 @@ do {								\
 			free_subs(i);
 			fatal(_("attempt to use array `%s' in a scalar context"), array_vname(subs));
 		}
-		(void) force_string(subs);
 
-		last = NULL;	/* shut up gcc -Wall */
-		hash1 = 0;	/* ditto */
-		bucket = NULL;	/* array may be empty */
-
-		if (symbol->var_array != NULL) {
-			hash1 = hash(subs->stptr, subs->stlen,
-					(unsigned long) symbol->array_size, NULL);
-			bucket = assoc_find(symbol, subs, hash1, &last);
-		}
-
-		if (bucket == NULL) {
-			if (do_lint)
+		val = in_array(symbol, subs);
+		if (val == NULL) {
+			if (do_lint) {
+				subs = force_string(subs);
 				lintwarn(_("delete: index `%s' not in array `%s'"),
 					subs->stptr, array_vname(symbol));
+			}
 			/* avoid memory leak, free all subs */
 			free_subs(i);
 			return;
 		}
 
 		if (i > 1) {
-			if (bucket->ahvalue->type != Node_var_array) {
+			if (val->type != Node_var_array) {
 				/* e.g.: a[1] = 1; delete a[1][1] */
+
 				free_subs(i);
+				subs = force_string(subs);
 				fatal(_("attempt to use scalar `%s[\"%.*s\"]' as an array"),
 					array_vname(symbol),
-					(int) bucket->ahname_len,
-					bucket->ahname_str);
+					(int) subs->stlen,
+					subs->stptr);
 			}
-			symbol = bucket->ahvalue;
+			symbol = val;
+			DEREF(subs);
 		}
-		DEREF(subs);
 	}
 
-	r = bucket->ahvalue;
-	if (r->type == Node_var_array) {
-		adjust_fcall_stack(r, nsubs);	/* fix function call stack; See above. */
-		assoc_clear(r);
+	if (val->type == Node_var_array) {
+		adjust_fcall_stack(val, nsubs);  /* fix function call stack; See above. */
+		assoc_clear(val);
 		/* cleared a sub-array, free Node_var_array */
-		efree(r->vname);
-		freenode(r);
+		efree(val->vname);
+		freenode(val);
 	} else
-		unref(r);
+		unref(val);
 
-	if (last != NULL)
-		last->ahnext = bucket->ahnext;
-	else
-		symbol->var_array[hash1] = bucket->ahnext;
-
-	unref(bucket);	/* unref() will free the ahname_str */
-	symbol->table_size--;
-	if (symbol->table_size <= 0) {
-		symbol->table_size = symbol->array_size = 0;
-		symbol->flags &= ~ARRAYMAXED;
-		if (symbol->var_array != NULL) {
-			efree(symbol->var_array);
-			symbol->var_array = NULL;
-		}
-	}
+	(void) assoc_remove(symbol, subs);
+	DEREF(subs);
 
 #undef free_subs
 }
@@ -782,272 +635,159 @@ do {								\
 void
 do_delete_loop(NODE *symbol, NODE **lhs)
 {
-	long i;
-
-	assert(symbol->type == Node_var_array);
+	NODE **list;
+	NODE fl;
 
-	if (symbol->var_array == NULL)
+	if (array_empty(symbol))
 		return;
 
-	/* get first index value */
-	for (i = 0; i < symbol->array_size; i++) {
-		if (symbol->var_array[i] != NULL) {
-			unref(*lhs);
-			*lhs = make_string(symbol->var_array[i]->ahname_str,
-					symbol->var_array[i]->ahname_len);
-			break;
-		}
-	}
+	fl.flags = AINDEX|ADELETE;	/* need a single index */
+	list = symbol->alist(symbol, & fl);
+	assert(list != NULL);
+
+	unref(*lhs);
+	*lhs = list[0];
+	efree(list);
 
 	/* blast the array in one shot */
-	adjust_fcall_stack(symbol, 0);
+	adjust_fcall_stack(symbol, 0);	
 	assoc_clear(symbol);
 }
 
-/* grow_table --- grow a hash table */
+
+/* value_info --- print scalar node info */
+
+int PREC_NUM = -1;
+int PREC_STR = -1;
 
 static void
-grow_table(NODE *symbol)
+value_info(NODE *n)
 {
-	NODE **old, **new, *chain, *next;
-	int i, j;
-	unsigned long hash1;
-	unsigned long oldsize, newsize, k;
-	/*
-	 * This is an array of primes. We grow the table by an order of
-	 * magnitude each time (not just doubling) so that growing is a
-	 * rare operation. We expect, on average, that it won't happen
-	 * more than twice.  When things are very large (> 8K), we just
-	 * double more or less, instead of just jumping from 8K to 64K.
-	 */
-	static const long sizes[] = {
-		13, 127, 1021, 8191, 16381, 32749, 65497, 131101, 262147,
-		524309, 1048583, 2097169, 4194319, 8388617, 16777259, 33554467, 
-		67108879, 134217757, 268435459, 536870923, 1073741827
-	};
-
-	/* find next biggest hash size */
-	newsize = oldsize = symbol->array_size;
-	for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
-		if (oldsize < sizes[i]) {
-			newsize = sizes[i];
-			break;
-		}
-	}
-
-	if (newsize == oldsize) {	/* table already at max (!) */
-		symbol->flags |= ARRAYMAXED;
+	if (n == Nnull_string || n == Null_field) {
+		fprintf(output_fp, "<(null)>");
 		return;
 	}
 
-	/* allocate new table */
-	emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table");
-	memset(new, '\0', newsize * sizeof(NODE *));
+	if ((n->flags & (STRING|STRCUR)) != 0) {
+		fprintf(output_fp, "<");
+		fprintf(output_fp, "\"%.*s\"", PREC_STR, n->stptr);
+		if ((n->flags & (NUMBER|NUMCUR)) != 0)
+			fprintf(output_fp, ":%.*g", PREC_NUM, n->numbr);
+		fprintf(output_fp, ">");
+	} else
+		fprintf(output_fp, "<%.*g>", PREC_NUM, n->numbr);
 
-	/* brand new hash table, set things up and return */
-	if (symbol->var_array == NULL) {
-		symbol->table_size = 0;
-		goto done;
-	}
+	fprintf(output_fp, ":%s", flags2str(n->flags));
 
-	/* old hash table there, move stuff to new, free old */
-	old = symbol->var_array;
-	for (k = 0; k < oldsize; k++) {
-		if (old[k] == NULL)
-			continue;
-
-		for (chain = old[k]; chain != NULL; chain = next) {
-			next = chain->ahnext;
-			hash1 = chain->ahcode % newsize;
+	if ((n->flags & FIELD) == 0)
+		fprintf(output_fp, ":%ld", n->valref);
+	else
+		fprintf(output_fp, ":");
 
-			/* remove from old list, add to new */
-			chain->ahnext = new[hash1];
-			new[hash1] = chain;
-		}
+	if ((n->flags & (STRING|STRCUR)) == STRCUR) {
+		fprintf(output_fp, "][");
+		fprintf(output_fp, "stfmt=%d, ", n->stfmt);	
+		fprintf(output_fp, "CONVFMT=\"%s\"", n->stfmt <= -1 ? "%ld"
+					: fmt_list[n->stfmt]->stptr);
 	}
-	efree(old);
-
-done:
-	/*
-	 * note that symbol->table_size does not change if an old array,
-	 * and is explicitly set to 0 if a new one.
-	 */
-	symbol->var_array = new;
-	symbol->array_size = newsize;
 }
 
-/* pr_node --- print simple node info */
 
-static void
-pr_node(NODE *n)
+#ifdef ARRAYDEBUG
+
+NODE *
+do_aoption(int nargs)
 {
-	if ((n->flags & NUMBER) != 0)
-		printf("%s %g p: %p", flags2str(n->flags), n->numbr, n);
-	else
-		printf("%s %.*s p: %p", flags2str(n->flags),
-				(int) n->stlen, n->stptr, n);
+	int ret = -1;
+	NODE *opt, *val;
+	int i;
+	array_ptr *afunc;
+
+	val = POP_SCALAR();
+	opt = POP_SCALAR();
+	for (i = 0; i < num_atypes; i++) {
+		afunc = atypes[i];
+		if (afunc[NUM_AFUNCS] && (*afunc[NUM_AFUNCS])(opt, val) != NULL) {
+			ret = 0;
+			break;
+		}
+	}
+	DEREF(opt);
+	DEREF(val);
+	return make_number((AWKNUM) ret);
 }
 
+#endif
 
-static void
+void
 indent(int indent_level)
 {
-	int k;
-	for (k = 0; k < indent_level; k++)
-		putchar('\t');
+	int i;
+	for (i = 0; i < indent_level; i++)
+		fprintf(output_fp, "%s", indent_char);
 }
 
-/* assoc_dump --- dump the contents of an array */
+/* assoc_info --- print index, value info */
 
-NODE *
-assoc_dump(NODE *symbol, int indent_level)
+void
+assoc_info(NODE *subs, NODE *val, NODE *ndump, const char *aname)
 {
-	long i;
-	NODE *bucket;
+	int indent_level = ndump->alevel;
 
+	indent_level++;
 	indent(indent_level);
-	if (symbol->var_array == NULL) {
-		printf(_("%s: empty (null)\n"), symbol->vname);
-		return make_number((AWKNUM) 0);
-	}
-
-	if (symbol->table_size == 0) {
-		printf(_("%s: empty (zero)\n"), symbol->vname);
-		return make_number((AWKNUM) 0);
-	}
+	fprintf(output_fp, "I: [%s:", aname);
+	if ((subs->flags & INTIND) != 0)
+		fprintf(output_fp, "<%ld>", (long) subs->numbr);
+	else
+		value_info(subs);
+	fprintf(output_fp, "]\n");
 
-	printf(_("%s: table_size = %d, array_size = %d\n"), symbol->vname,
-			(int) symbol->table_size, (int) symbol->array_size);
-
-	for (i = 0; i < symbol->array_size; i++) {
-		for (bucket = symbol->var_array[i]; bucket != NULL;
-				bucket = bucket->ahnext) {
-			indent(indent_level);
-			printf("%s: I: [len %d <%.*s> p: %p] V: [",
-				symbol->vname,
-				(int) bucket->ahname_len,
-				(int) bucket->ahname_len,
-				bucket->ahname_str,
-				bucket->ahname_str);
-			if (bucket->ahvalue->type == Node_var_array) {
-				printf("\n");
-				assoc_dump(bucket->ahvalue, indent_level + 1);
-				indent(indent_level);
-			} else
-				pr_node(bucket->ahvalue);
-			printf("]\n");
-		}
+	indent(indent_level);
+	if (val->type == Node_val) {
+		fprintf(output_fp, "V: [scalar: ");
+		value_info(val);
+	} else {
+		fprintf(output_fp, "V: [");
+		ndump->alevel++;
+		ndump->adepth--;
+		assoc_dump(val, ndump);
+		ndump->adepth++;
+		ndump->alevel--;
+		indent(indent_level);
 	}
-
-	return make_number((AWKNUM) 0);
+	fprintf(output_fp, "]\n");
 }
 
+
 /* do_adump --- dump an array: interface to assoc_dump */
 
 NODE *
 do_adump(int nargs)
 {
-	NODE *r, *a;
+	NODE *symbol, *tmp;
+	static NODE ndump;
+	long depth = 0;
 
-	a = POP();
-	if (a->type == Node_param_list) {
-		printf(_("%s: is parameter\n"), a->vname);
-		a = GET_PARAM(a->param_cnt);
-	}
-	if (a->type == Node_array_ref) {
-		printf(_("%s: array_ref to %s\n"), a->vname,
-					a->orig_array->vname);
-		a = a->orig_array;
-	}
-	if (a->type != Node_var_array)
-		fatal(_("adump: argument not an array"));
-	r = assoc_dump(a, 0);
-	return r;
-}
-
-/*
- * The following functions implement the builtin
- * asort function.  Initial work by Alan J. Broder,
- * ajb@woti.com.
- */
-
-/* dup_table --- recursively duplicate input array "symbol" */
+	/* depth < 0, no index and value info.
+	 *       = 0, main array index and value info; does not descend into sub-arrays.
+	 *       > 0, descends into 'depth' sub-arrays, and prints index and value info.
+	 */
 
-static NODE *
-dup_table(NODE *symbol, NODE *newsymb)
-{
-	NODE **old, **new, *chain, *bucket;
-	long i;
-	unsigned long cursize;
-
-	/* find the current hash size */
-	cursize = symbol->array_size;
-
-	new = NULL;
-
-	/* input is a brand new hash table, so there's nothing to copy */
-	if (symbol->var_array == NULL)
-		newsymb->table_size = 0;
-	else {
-		/* old hash table there, dupnode stuff into a new table */
-
-		/* allocate new table */
-		emalloc(new, NODE **, cursize * sizeof(NODE *), "dup_table");
-		memset(new, '\0', cursize * sizeof(NODE *));
-
-		/* do the copying/dupnode'ing */
-		old = symbol->var_array;
-		for (i = 0; i < cursize; i++) {
-			if (old[i] != NULL) {
-				for (chain = old[i]; chain != NULL;
-						chain = chain->ahnext) {
-					/* get a node for the linked list */
-					getnode(bucket);
-					bucket->type = Node_ahash;
-					bucket->flags |= MALLOC;
-					bucket->ahname_ref = 1;
-					bucket->ahcode = chain->ahcode;
-					if ((chain->flags & NUMIND) != 0) {
-						bucket->ahname_num = chain->ahname_num;
-						bucket->flags |= NUMIND;
-					}
-
-					/*
-					 * copy the corresponding name and
-					 * value from the original input list
-					 */
-					emalloc(bucket->ahname_str, char *, chain->ahname_len + 2, "dup_table");
-					bucket->ahname_len = chain->ahname_len;
-
-					memcpy(bucket->ahname_str, chain->ahname_str, chain->ahname_len);
-					bucket->ahname_str[bucket->ahname_len] = '\0';
-
-					if (chain->ahvalue->type == Node_var_array) {
-						NODE *r;
-						getnode(r);
-						r->type = Node_var_array;
-						r->vname = estrdup(chain->ahname_str, chain->ahname_len);
-						r->parent_array = newsymb;
-						bucket->ahvalue = dup_table(chain->ahvalue, r);
-					} else
-						bucket->ahvalue = dupnode(chain->ahvalue);
-
-					/*
-					 * put the node on the corresponding
-					 * linked list in the new table
-					 */
-					bucket->ahnext = new[i];
-					new[i] = bucket;
-				}
-			}
-		}
-		newsymb->table_size = symbol->table_size;
+	if (nargs == 2) {
+		tmp = POP_SCALAR();
+		depth = (long) force_number(tmp);
+		DEREF(tmp);
 	}
-
-	newsymb->var_array = new;
-	newsymb->array_size = cursize;
-	newsymb->flags = symbol->flags;	/* ARRAYMAXED */
-	return newsymb;
+	symbol = POP_PARAM();
+	if (symbol->type != Node_var_array)
+		fatal(_("adump: first argument not an array"));
+
+	ndump.type = Node_dump_array;
+	ndump.adepth = depth;
+	ndump.alevel = 0;
+	assoc_dump(symbol, & ndump);
+	return make_number((AWKNUM) 0);
 }
 
 
@@ -1059,15 +799,13 @@ asort_actual(int nargs, SORT_CTXT ctxt)
 	NODE *array, *dest = NULL, *result;
 	NODE *r, *subs, *s;
 	NODE **list, **ptr;
-#define TSIZE	100	/* an arbitrary amount */
-	static char buf[TSIZE+2];
 	unsigned long num_elems, i;
 	const char *sort_str;
 
 	if (nargs == 3)  /* 3rd optional arg */
 		s = POP_STRING();
 	else
-		s = Nnull_string;	/* "" => default sorting */
+		s = dupnode(Nnull_string);	/* "" => default sorting */
 
 	s = force_string(s);
 	sort_str = s->stptr;
@@ -1078,7 +816,6 @@ asort_actual(int nargs, SORT_CTXT ctxt)
 			sort_str = "@ind_str_asc";
 	}
 
-
 	if (nargs >= 2) {  /* 2nd optional arg */
 		dest = POP_PARAM();
 		if (dest->type != Node_var_array) {
@@ -1107,15 +844,16 @@ asort_actual(int nargs, SORT_CTXT ctxt)
 				fatal(ctxt == ASORT ?
 					_("asort: cannot use a subarray of second arg for first arg") :
 					_("asorti: cannot use a subarray of second arg for first arg"));
-                }
+		}
 	}
 
-	num_elems = array->table_size;
-	if (num_elems == 0 || array->var_array == NULL) {	/* source array is empty */
+	if (array_empty(array)) {
+		/* source array is empty */
 		if (dest != NULL && dest != array)
 			assoc_clear(dest);
 		return make_number((AWKNUM) 0);
 	}
+	num_elems = array->table_size;
 
 	/* sorting happens inside assoc_list */
 	list = assoc_list(array, sort_str, ctxt);
@@ -1132,70 +870,52 @@ asort_actual(int nargs, SORT_CTXT ctxt)
 		result = dest;
 	} else {
 		/* use 'result' as a temporary destination array */
-		getnode(result);
-		memset(result, '\0', sizeof(NODE));
-		result->type = Node_var_array;
+		result = make_array();
 		result->vname = array->vname;
 		result->parent_array = array->parent_array;
 	}
 
-	subs = make_str_node(buf, TSIZE, ALREADY_MALLOCED);   /* fake it */
-	subs->flags &= ~MALLOC;		/* safety */
-	for (i = 1, ptr = list; i <= num_elems; i++) {
-		sprintf(buf, "%lu", i);
-		subs->stlen = strlen(buf);
-		/* make number valid in case this array gets sorted later */
-		subs->numbr = i;
-		subs->flags |= NUMCUR;
-		r = *ptr++;
-		if (ctxt == ASORTI) {
-			/*
-			 * We want the indices of the source array as values
-			 * of the 'result' array.
-			 */
-			*assoc_lookup(result, subs, FALSE) =
-					make_string(r->ahname_str, r->ahname_len);
-		} else {
-			NODE *val;
-
-			/* We want the values of the source array. */
-
-			val = r->ahvalue;
-			if (result != dest) {
-				/* optimization for dest = NULL or dest = array */
-
-				if (val->type == Node_var_array) {
-					/* update subarray index in parent array */
-					efree(val->vname);
-					val->vname = estrdup(subs->stptr, subs->stlen);
-				} 
-				*assoc_lookup(result, subs, FALSE) = val;
-				r->ahvalue = Nnull_string;
-			} else {
-				if (val->type == Node_val)
-					*assoc_lookup(result, subs, FALSE) = dupnode(val);
-				else {
-					NODE *arr;
-
-					/*
-					 * There isn't any reference counting for
-					 * subarrays, so recursively copy subarrays
-					 * using dup_table().
-					 */
-					getnode(arr);
-					arr->type = Node_var_array;
-					arr->var_array = NULL;
-					arr->vname = estrdup(subs->stptr, subs->stlen);
-					arr->parent_array = array; /* actual parent, not the temporary one. */
-					*assoc_lookup(result, subs, FALSE) = dup_table(val, arr);
-				}
+	subs = make_number((AWKNUM) 0.0);
+
+	if (ctxt == ASORTI) {
+		/* We want the indices of the source array. */
+
+		for (i = 1, ptr = list; i <= num_elems; i++, ptr += 2) {
+			subs->numbr = (AWKNUM) i;
+			r = *ptr;
+			*assoc_lookup(result, subs) = r;
+		}
+	} else {
+		/* We want the values of the source array. */
+
+		for (i = 1, ptr = list; i <= num_elems; i++) {
+			subs->numbr = (AWKNUM) i;
+
+			/* free index node */
+			r = *ptr++;
+			unref(r);
+
+			/* value node */
+			r = *ptr++;
+
+			/* FIXME: asort(a) optimization */
+
+			if (r->type == Node_val)
+				*assoc_lookup(result, subs) = dupnode(r);
+			else {
+				NODE *arr;
+				arr = make_array();
+				subs = force_string(subs);
+				arr->vname = subs->stptr;
+				subs->stptr = NULL;
+				subs->flags &= ~STRCUR;
+				arr->parent_array = array; /* actual parent, not the temporary one. */
+				*assoc_lookup(result, subs) = assoc_copy(r, arr);
 			}
 		}
+	}
 
-		unref(r);
- 	}
-
-	freenode(subs);	/* stptr(buf) not malloc-ed */
+	unref(subs);
 	efree(list);
 
 	if (result != dest) {
@@ -1209,7 +929,6 @@ asort_actual(int nargs, SORT_CTXT ctxt)
 
 	return make_number((AWKNUM) num_elems);
 }
-#undef TSIZE
 
 /* do_asort --- sort array by value */
 
@@ -1227,6 +946,7 @@ do_asorti(int nargs)
 	return asort_actual(nargs, ASORTI);
 }
 
+
 /*
  * cmp_string --- compare two strings; logic similar to cmp_nodes() in eval.c
  *	except the extra case-sensitive comparison when the case-insensitive
@@ -1241,18 +961,10 @@ cmp_string(const NODE *n1, const NODE *n2)
 	int ret;
 	size_t lmin;
 
-	assert(n1->type == n2->type);
-	if (n1->type == Node_ahash) {
-		s1 = n1->ahname_str;
-		len1 = n1->ahname_len;
-		s2 =  n2->ahname_str;
-		len2 = n2->ahname_len;
-	} else {
-		s1 = n1->stptr;
-		len1 = n1->stlen;
-		s2 =  n2->stptr;
-		len2 = n2->stlen;
-	}
+	s1 = n1->stptr;
+	len1 = n1->stlen;
+	s2 =  n2->stptr;
+	len2 = n2->stlen;
 
 	if (len1 == 0)
 		return len2 == 0 ? 0 : -1;
@@ -1303,7 +1015,7 @@ sort_up_index_string(const void *p1, const void *p2)
 }
 
 
-/* sort_down_index_string --- descending index strings */
+/* sort_down_index_str --- qsort comparison function; descending index strings. */
 
 static int
 sort_down_index_string(const void *p1, const void *p2)
@@ -1326,24 +1038,23 @@ sort_down_index_string(const void *p1, const void *p2)
 static int
 sort_up_index_number(const void *p1, const void *p2)
 {
-	const NODE *n1, *n2;
+	const NODE *t1, *t2;
 	int ret;
 
-	n1 = *((const NODE *const *) p1);
-	n2 = *((const NODE *const *) p2);
+	t1 = *((const NODE *const *) p1);
+	t2 = *((const NODE *const *) p2);
 
-	if (n1->ahname_num < n2->ahname_num)
+	if (t1->numbr < t2->numbr)
 		ret = -1;
 	else
-		ret = (n1->ahname_num > n2->ahname_num);
+		ret = (t1->numbr > t2->numbr);
 
 	/* break a tie with the index string itself */
 	if (ret == 0)
-		return cmp_string(n1, n2);
+		return cmp_string(t1, t2);
 	return ret;
 }
 
-
 /* sort_down_index_number --- qsort comparison function; descending index numbers */
 
 static int
@@ -1359,29 +1070,23 @@ static int
 sort_up_value_string(const void *p1, const void *p2)
 {
 	const NODE *t1, *t2;
-	NODE *n1, *n2;
-
-	/* we're passed a pair of index (array subscript) nodes */
-	t1 = *(const NODE *const *) p1;
-	t2 = *(const NODE *const *) p2;
 
-	/* and we want to compare the element values they refer to */
-	n1 = t1->ahvalue;
-	n2 = t2->ahvalue;
+	t1 = *((const NODE *const *) p1 + 1);
+	t2 = *((const NODE *const *) p2 + 1);
 
-	if (n1->type == Node_var_array) {
-		/* return 0 if n2 is a sub-array too, else return 1 */
-		return (n2->type != Node_var_array);
+	if (t1->type == Node_var_array) {
+		/* return 0 if t2 is a sub-array too, else return 1 */
+		return (t2->type != Node_var_array);
 	}
-	if (n2->type == Node_var_array)
-		return -1;		/* n1 (scalar) < n2 (sub-array) */
+	if (t2->type == Node_var_array)
+		return -1;		/* t1 (scalar) < t2 (sub-array) */
 
-	/* n1 and n2 both have string values; See sort_force_value_string(). */
-	return cmp_string(n1, n2);
+	/* t1 and t2 both have string values */
+	return cmp_string(t1, t2);
 }
 
 
-/* sort_down_value_string --- descending value string */
+/* sort_down_value_string --- qsort comparison function; descending value string */
 
 static int
 sort_down_value_string(const void *p1, const void *p2)
@@ -1389,50 +1094,46 @@ sort_down_value_string(const void *p1, const void *p2)
 	return -sort_up_value_string(p1, p2);
 }
 
+
 /* sort_up_value_number --- qsort comparison function; ascending value number */
 
 static int
 sort_up_value_number(const void *p1, const void *p2)
 {
-	const NODE *t1, *t2;
-	NODE *n1, *n2;
+	NODE *t1, *t2;
 	int ret;
 
-	/* we're passed a pair of index (array subscript) nodes */
-	t1 = *(const NODE *const *) p1;
-	t2 = *(const NODE *const *) p2;
-
-	/* and we want to compare the element values they refer to */
-	n1 = t1->ahvalue;
-	n2 = t2->ahvalue;
+	t1 = *((NODE *const *) p1 + 1);
+	t2 = *((NODE *const *) p2 + 1);
 
-	if (n1->type == Node_var_array) {
-		/* return 0 if n2 is a sub-array too, else return 1 */
-		return (n2->type != Node_var_array);
+	if (t1->type == Node_var_array) {
+		/* return 0 if t2 is a sub-array too, else return 1 */
+		return (t2->type != Node_var_array);
 	}
-	if (n2->type == Node_var_array)
-		return -1;		/* n1 (scalar) < n2 (sub-array) */
+	if (t2->type == Node_var_array)
+		return -1;		/* t1 (scalar) < t2 (sub-array) */
 
-	/* n1 and n2 both Node_val, and force_number'ed */
-	if (n1->numbr < n2->numbr)
+	/* t1 and t2 both Node_val, and force_number'ed */
+	if (t1->numbr < t2->numbr)
 		ret = -1;
 	else
-		ret = (n1->numbr > n2->numbr);
+		ret = (t1->numbr > t2->numbr);
 
 	if (ret == 0) {
 		/*
 		 * Use string value to guarantee same sort order on all
 		 * versions of qsort().
 		 */
-		n1 = force_string(n1);
-		n2 = force_string(n2);
-		ret = cmp_string(n1, n2);
+		t1 = force_string(t1);
+		t2 = force_string(t2);
+		ret = cmp_string(t1, t2);
 	}
 
 	return ret;
 }
 
-/* sort_down_value_number --- descending value number */
+
+/* sort_down_value_number --- qsort comparison function; descending value number */
 
 static int
 sort_down_value_number(const void *p1, const void *p2)
@@ -1440,21 +1141,17 @@ sort_down_value_number(const void *p1, const void *p2)
 	return -sort_up_value_number(p1, p2);
 }
 
+
 /* sort_up_value_type --- qsort comparison function; ascending value type */
 
 static int
 sort_up_value_type(const void *p1, const void *p2)
 {
-	const NODE *t1, *t2;
 	NODE *n1, *n2;
 
-	/* we're passed a pair of index (array subscript) nodes */
-	t1 = *(const NODE *const *) p1;
-	t2 = *(const NODE *const *) p2;
-
-	/* and we want to compare the element values they refer to */
-	n1 = t1->ahvalue;
-	n2 = t2->ahvalue;
+	/* we want to compare the element values */
+	n1 = *((NODE *const *) p1 + 1);
+	n2 = *((NODE *const *) p2 + 1);
 
 	/* 1. Arrays vs. scalar, scalar is less than array */
 	if (n1->type == Node_var_array) {
@@ -1472,6 +1169,12 @@ sort_up_value_type(const void *p1, const void *p2)
 	if ((n2->flags & MAYBE_NUM) != 0)
 		(void) force_number(n2);
 
+	/* 2.5. Resolve INTIND, so that is STRING, and not NUMBER */
+	if ((n1->flags & INTIND) != 0)
+		(void) force_string(n1);
+	if ((n2->flags & INTIND) != 0)
+		(void) force_string(n2);
+
 	if ((n1->flags & NUMBER) != 0 && (n2->flags & NUMBER) != 0) {
 		if (n1->numbr < n2->numbr)
 			return -1;
@@ -1492,7 +1195,7 @@ sort_up_value_type(const void *p1, const void *p2)
 	return cmp_string(n1, n2);
 }
 
-/* sort_down_value_type --- descending value type */
+/* sort_down_value_type --- qsort comparison function; descending value type */
 
 static int
 sort_down_value_type(const void *p1, const void *p2)
@@ -1505,27 +1208,26 @@ sort_down_value_type(const void *p1, const void *p2)
 static int
 sort_user_func(const void *p1, const void *p2)
 {
-	const NODE *t1, *t2;
 	NODE *idx1, *idx2, *val1, *val2;
 	AWKNUM ret;
 	INSTRUCTION *code;
 	extern int exiting;
 
-	t1 = *((const NODE *const *) p1);
-	t2 = *((const NODE *const *) p2);
-
-	idx1 = make_string(t1->ahname_str, t1->ahname_len);
-	idx2 = make_string(t2->ahname_str, t2->ahname_len);
-	val1 = t1->ahvalue;
-	val2 = t2->ahvalue;
+	idx1 = *((NODE *const *) p1);
+	idx2 = *((NODE *const *) p2);
+	val1 = *((NODE *const *) p1 + 1);
+	val2 = *((NODE *const *) p2 + 1);
 
 	code = TOP()->code_ptr;	/* comparison function call instructions */
 
 	/* setup 4 arguments to comp_func() */
+	UPREF(idx1);
 	PUSH(idx1);
 	if (val1->type == Node_val)
 		UPREF(val1);
 	PUSH(val1);
+
+	UPREF(idx2);
 	PUSH(idx2);
 	if (val2->type == Node_val)
 		UPREF(val2);
@@ -1543,113 +1245,73 @@ sort_user_func(const void *p1, const void *p2)
 	return (ret < 0.0) ? -1 : (ret > 0.0);
 }
 
-/* sort_force_index_number -- pre-process list items for sorting indices as numbers */
-
-static void
-sort_force_index_number(NODE **list, size_t num_elems)
-{
-	size_t i;
-	NODE *r;
-	static NODE temp_node;
-
-	for (i = 0; i < num_elems; i++) {
-		r = list[i];
-
-		if ((r->flags & NUMIND) != 0)	/* once in a lifetime is plenty */
-			continue;
-		temp_node.type = Node_val;
-		temp_node.stptr = r->ahname_str;
-		temp_node.stlen = r->ahname_len;
-		temp_node.flags = 0;	/* only interested in the return value of r_force_number */
-		r->ahname_num = r_force_number(& temp_node);
-		r->flags |= NUMIND;
-	}
-}
-
-/* sort_force_value_number -- pre-process list items for sorting values as numbers */
-
-static void
-sort_force_value_number(NODE **list, size_t num_elems)
-{
-	size_t i;
-	NODE *r, *val;
-
-	for (i = 0; i < num_elems; i++) {
-		r = list[i];
-		val = r->ahvalue;
-		if (val->type == Node_val)
-			(void) force_number(val);
-	}
-}
-
-/* sort_force_value_string -- pre-process list items for sorting values as strings */
-
-static void
-sort_force_value_string(NODE **list, size_t num_elems)
-{
-	size_t i;
-	NODE *r, *val;
-
-	for (i = 0; i < num_elems; i++) {
-		r = list[i];
-		val = r->ahvalue;
-		if (val->type == Node_val)
-			r->ahvalue = force_string(val);
-	}
-}
 
 /* assoc_list -- construct, and optionally sort, a list of array elements */  
 
 NODE **
-assoc_list(NODE *array, const char *sort_str, SORT_CTXT sort_ctxt)
+assoc_list(NODE *symbol, const char *sort_str, SORT_CTXT sort_ctxt)
 {
-	typedef void (*qsort_prefunc)(NODE **, size_t);
 	typedef int (*qsort_compfunc)(const void *, const void *);
 
 	static const struct qsort_funcs {
 		const char *name;
 		qsort_compfunc comp_func;
-		qsort_prefunc pre_func;		/* pre-processing of list items */
+		enum assoc_list_flags flags;
 	} sort_funcs[] = {
-		{ "@ind_str_asc",	sort_up_index_string,	0 },
-		{ "@ind_num_asc",	sort_up_index_number,	sort_force_index_number },
-		{ "@val_str_asc",	sort_up_value_string,	sort_force_value_string },
-		{ "@val_num_asc",	sort_up_value_number,	sort_force_value_number	},
-		{ "@ind_str_desc",	sort_down_index_string,	0 },
-		{ "@ind_num_desc",	sort_down_index_number,	sort_force_index_number },
-		{ "@val_str_desc",	sort_down_value_string,	sort_force_value_string },
-		{ "@val_num_desc",	sort_down_value_number,	sort_force_value_number },
-		{ "@val_type_asc",	sort_up_value_type,	0 },
-		{ "@val_type_desc",	sort_down_value_type,	0 },
-		{ "@unsorted",		0,	0 },
-	};
+{ "@ind_str_asc",	sort_up_index_string,	AINDEX|AISTR|AASC },
+{ "@ind_num_asc",	sort_up_index_number,	AINDEX|AINUM|AASC },
+{ "@val_str_asc",	sort_up_value_string,	AVALUE|AVSTR|AASC },
+{ "@val_num_asc",	sort_up_value_number,	AVALUE|AVNUM|AASC },
+{ "@ind_str_desc",	sort_down_index_string,	AINDEX|AISTR|ADESC },
+{ "@ind_num_desc",	sort_down_index_number,	AINDEX|AINUM|ADESC },
+{ "@val_str_desc",	sort_down_value_string,	AVALUE|AVSTR|ADESC },
+{ "@val_num_desc",	sort_down_value_number,	AVALUE|AVNUM|ADESC },
+{ "@val_type_asc",	sort_up_value_type,	AVALUE|AASC },
+{ "@val_type_desc",	sort_down_value_type,	AVALUE|ADESC },
+{ "@unsorted",		0,			AINDEX },
+};
+
+	/* N.B.: AASC and ADESC are hints to the specific array types.
+	 *	See cint_list() in cint_array.c.
+	 */
+
 	NODE **list;
-	NODE *r;
-	size_t num_elems, i, j;
+	NODE fl;
+	unsigned long num_elems, j;
+	int elem_size, qi;
 	qsort_compfunc cmp_func = 0;
-	qsort_prefunc pre_func = 0;
 	INSTRUCTION *code = NULL;
-	int qi;
 	extern int currule;
 	
-	num_elems = array->table_size;
+	num_elems = symbol->table_size;
 	assert(num_elems > 0);
 
+	elem_size = 1;
+	fl.flags = 0;
+
 	for (qi = 0, j = sizeof(sort_funcs)/sizeof(sort_funcs[0]); qi < j; qi++) {
 		if (strcmp(sort_funcs[qi].name, sort_str) == 0)
 			break;
 	}
 
-	if (qi >= 0 && qi < j) {
+	if (qi < j) {
 		cmp_func = sort_funcs[qi].comp_func;
-		pre_func = sort_funcs[qi].pre_func;
+		fl.flags = sort_funcs[qi].flags;
+
+		if (symbol->array_funcs != cint_array_func)
+			fl.flags &= ~(AASC|ADESC);
 
-	} else {		/* unrecognized */
+		if (sort_ctxt != SORTED_IN || (fl.flags & AVALUE) != 0) {
+			/* need index and value pair in the list */
+
+			fl.flags |= (AINDEX|AVALUE);
+			elem_size = 2;
+		}
+
+	} else {	/* unrecognized */
 		NODE *f;
 		const char *sp;	
 
-		assert(sort_str != NULL);
-
 		for (sp = sort_str; *sp != '\0'
 		     && ! isspace((unsigned char) *sp); sp++)
 			continue;
@@ -1663,7 +1325,10 @@ assoc_list(NODE *array, const char *sort_str, SORT_CTXT sort_ctxt)
 			fatal(_("sort comparison function `%s' is not defined"), sort_str);
 
 		cmp_func = sort_user_func;
-		/* pre_func is still NULL */
+
+		/* need index and value pair in the list */
+		fl.flags |= (AVALUE|AINDEX);
+		elem_size = 2;
 
 		/* make function call instructions */
 		code = bcalloc(Op_func_call, 2, 0);
@@ -1683,23 +1348,12 @@ assoc_list(NODE *array, const char *sort_str, SORT_CTXT sort_ctxt)
 		PUSH_CODE(code);
 	}
 
-	/* allocate space for array; the extra space is used in for(i in a) opcode (eval.c) */
-	emalloc(list, NODE **, (num_elems + 1) * sizeof(NODE *), "assoc_list");
-
-	/* populate it */
-	for (i = j = 0; i < array->array_size; i++)
-		for (r = array->var_array[i]; r != NULL; r = r->ahnext)
-			list[j++] = dupnode(r);
-	list[num_elems] = NULL;
+	list = symbol->alist(symbol, & fl);
 
-	if (! cmp_func) /* unsorted */
-		return list;
+	if (! cmp_func || (fl.flags & (AASC|ADESC)) != 0)
+		return list;	/* unsorted or list already sorted */
 
-	/* special pre-processing of list items */
-	if (pre_func)
-		pre_func(list, num_elems);
-
-	qsort(list, num_elems, sizeof(NODE *), cmp_func); /* shazzam! */
+	qsort(list, num_elems, elem_size * sizeof(NODE *), cmp_func); /* shazzam! */
 
 	if (cmp_func == sort_user_func) {
 		code = POP_CODE();
@@ -1708,72 +1362,17 @@ assoc_list(NODE *array, const char *sort_str, SORT_CTXT sort_ctxt)
 		bcfree(code);                   /* Op_func_call */
 	}
 
-	return list;
-}
-
-
-/*
-From bonzini@gnu.org  Mon Oct 28 16:05:26 2002
-Date: Mon, 28 Oct 2002 13:33:03 +0100
-From: Paolo Bonzini <bonzini@gnu.org>
-To: arnold@skeeve.com
-Subject: Hash function
-Message-ID: <20021028123303.GA6832@biancaneve>
-
-Here is the hash function I'm using in GNU Smalltalk.  The scrambling is
-needed if you use powers of two as the table sizes.  If you use primes it
-is not needed.
-
-To use double-hashing with power-of-two size, you should use the
-_gst_hash_string(str, len) as the primary hash and
-scramble(_gst_hash_string (str, len)) | 1 as the secondary hash.
-
-Paolo
-
-*/
-/*
- * ADR: Slightly modified to work w/in the context of gawk.
- */
-
-static unsigned long
-gst_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code)
-{
-	unsigned long hashVal = 1497032417;    /* arbitrary value */
-	unsigned long ret;
-
-	while (len--) {
-		hashVal += *str++;
-		hashVal += (hashVal << 10);
-		hashVal ^= (hashVal >> 6);
-	}
-
-	ret = scramble(hashVal);
-
-	if (code != NULL)
-		*code = ret;
-
-	if (ret >= hsize)
-		ret %= hsize;
-
-	return ret;
-}
+	if (sort_ctxt == SORTED_IN
+		&& (fl.flags & (AINDEX|AVALUE)) == (AINDEX|AVALUE)
+	) {
+		/* relocate all index nodes to the first half of the list. */
+		for (j = 1; j < num_elems; j++)
+			list[j] = list[2 * j];
 
-static unsigned long
-scramble(unsigned long x)
-{
-	if (sizeof(long) == 4) {
-		int y = ~x;
+		/* give back extra memory */
 
-		x += (y << 10) | (y >> 22);
-		x += (x << 6)  | (x >> 26);
-		x -= (x << 16) | (x >> 16);
-	} else {
-		x ^= (~x) >> 31;
-		x += (x << 21) | (x >> 11);
-		x += (x << 5) | (x >> 27);
-		x += (x << 27) | (x >> 5);
-		x += (x << 31);
+		erealloc(list, NODE **, num_elems * sizeof(NODE *), "assoc_list");
 	}
 
-	return x;
+	return list;
 }
author	john haque <j.eh@mchsi.com>	2011-08-20 08:28:02 -0500
committer	john haque <j.eh@mchsi.com>	2011-10-12 07:33:05 -0500
commit	1fea520248b42ca995c8797698c60301ea42ffe9 (patch)
tree	1aa80c13392c25aa6bf3eb931fec9c83a0621e25 /array.c
parent	f757e147f1ae8254669b3222bc24a39ee8ff9b8f (diff)
download	gawk-1fea520248b42ca995c8797698c60301ea42ffe9.tar.gz