17 files changed, 651 insertions, 336 deletions
diff --git a/nasmlib/malloc.c b/nasmlib/alloc.c
index dbb7384a..ad2cff3d 100644
--- a/nasmlib/malloc.c
+++ b/nasmlib/alloc.c
@@ -36,13 +36,11 @@
  */
 
 #include "compiler.h"
-
-#include <stdlib.h>
-
 #include "nasmlib.h"
 #include "error.h"
+#include "alloc.h"
 
-static no_return nasm_alloc_failed(void)
+no_return nasm_alloc_failed(void)
 {
     /* If nasm_fatal() gets us back here, then croak hard */
     static bool already_here = false;
@@ -50,7 +48,7 @@ static no_return nasm_alloc_failed(void)
 
     if (likely(!already_here)) {
         already_here = true;
-        nasm_fatal(0, "out of memory!");
+        nasm_fatal("out of memory!");
     }
 
     errfile = error_file;
@@ -63,13 +61,6 @@ static no_return nasm_alloc_failed(void)
     abort();
 }
 
-static inline void *validate_ptr(void *p)
-{
-    if (unlikely(!p))
-        nasm_alloc_failed();
-    return p;
-}
-
 void *nasm_malloc(size_t size)
 {
     return validate_ptr(malloc(size));
diff --git a/nasmlib/alloc.h b/nasmlib/alloc.h
new file mode 100644
index 00000000..c599d213
--- /dev/null
+++ b/nasmlib/alloc.h
@@ -0,0 +1,48 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
+ *   See the file AUTHORS included with the NASM distribution for
+ *   the specific copyright holders.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+#ifndef NASMLIB_ALLOC_H
+#define NASMLIB_ALLOC_H
+
+#include "compiler.h"
+
+no_return nasm_alloc_failed(void);
+
+static inline void *validate_ptr(void *p)
+{
+    if (unlikely(!p))
+        nasm_alloc_failed();
+    return p;
+}
+
+#endif /* NASMLIB_ALLOC_H */
diff --git a/nasmlib/srcfile.c b/nasmlib/asprintf.c
index 6fbe763f..be88d491 100644
--- a/nasmlib/srcfile.c
+++ b/nasmlib/asprintf.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *
- *   Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -31,92 +31,62 @@
  *
  * ----------------------------------------------------------------------- */
 
-/*
- * srcfile.c - keep track of the current position in the input stream
- */
-
 #include "compiler.h"
-
-#include <string.h>
-#include <inttypes.h>
-
 #include "nasmlib.h"
-#include "hashtbl.h"
-
-static const char *file_name = NULL;
-static int32_t line_number = 0;
-
-static struct hash_table filename_hash;
-
-void src_init(void)
-{
-    hash_init(&filename_hash, HASH_MEDIUM);
-}
-
-void src_free(void)
-{
-    hash_free_all(&filename_hash, false);
-}
+#include "alloc.h"
 
 /*
- * Set the current filename, returning the old one.  The input
- * filename is duplicated if needed.
+ * nasm_[v]asprintf() are variants of the semi-standard [v]asprintf()
+ * functions, except that we return the pointer instead of a count.
+ * The size of the string (including the final NUL!) is available
+ * by calling nasm_aprintf_size() afterwards.
+ *
+ * nasm_[v]axprintf() are similar, but allocates a user-defined amount
+ * of storage before the string, and returns a pointer to the
+ * allocated buffer.
  */
-const char *src_set_fname(const char *newname)
-{
-    struct hash_insert hi;
-    const char *oldname;
-    void **dp;
 
-    if (newname) {
-        dp = hash_find(&filename_hash, newname, &hi);
-        if (dp) {
-            newname = (const char *)(*dp);
-        } else {
-            newname = nasm_strdup(newname);
-            hash_add(&hi, newname, (void *)newname);
-        }
-    }
+size_t _nasm_aprintf_size;
 
-    oldname = file_name;
-    file_name = newname;
-    return oldname;
-}
-
-int32_t src_set_linnum(int32_t newline)
+void *nasm_vaxprintf(size_t extra, const char *fmt, va_list ap)
 {
-    int32_t oldline = line_number;
-    line_number = newline;
-    return oldline;
+    char *strp;
+    va_list xap;
+    size_t bytes;
+
+    va_copy(xap, ap);
+    _nasm_aprintf_size = bytes = vsnprintf(NULL, 0, fmt, xap) + 1;
+    va_end(xap);
+    strp = nasm_malloc(extra+bytes);
+    vsnprintf(strp+extra, bytes, fmt, ap);
+    return strp;
 }
 
-void src_set(int32_t line, const char *fname)
+char *nasm_vasprintf(const char *fmt, va_list ap)
 {
-    src_set_fname(fname);
-    src_set_linnum(line);
+    return nasm_vaxprintf(0, fmt, ap);
 }
 
-const char *src_get_fname(void)
+void *nasm_axprintf(size_t extra, const char *fmt, ...)
 {
-    return file_name;
-}
+    va_list ap;
+    void *strp;
 
-int32_t src_get_linnum(void)
-{
-    return line_number;
+    va_start(ap, fmt);
+    strp = nasm_vaxprintf(extra, fmt, ap);
+    va_end(ap);
+
+    return strp;
 }
 
-int32_t src_get(int32_t *xline, const char **xname)
+char *nasm_asprintf(const char *fmt, ...)
 {
-    const char *xn = *xname;
-    int32_t xl = *xline;
+    va_list ap;
+    char *strp;
 
-    *xline = line_number;
-    *xname = file_name;
+    va_start(ap, fmt);
+    strp = nasm_vaxprintf(0, fmt, ap);
+    va_end(ap);
 
-    /* XXX: Is the strcmp() really needed here? */
-    if (!file_name || !xn || (xn != file_name && strcmp(xn, file_name)))
-        return -2;
-    else
-        return line_number - xl;
+    return strp;
 }
diff --git a/nasmlib/bsi.c b/nasmlib/bsi.c
index a811e45e..ae31f482 100644
--- a/nasmlib/bsi.c
+++ b/nasmlib/bsi.c
@@ -37,7 +37,6 @@
 
 #include "compiler.h"
 
-#include <string.h>
 
 #include "nasmlib.h"
 
diff --git a/nasmlib/crc64.c b/nasmlib/crc64.c
index 338e0be4..334cd307 100644
--- a/nasmlib/crc64.c
+++ b/nasmlib/crc64.c
@@ -32,7 +32,7 @@
  * ----------------------------------------------------------------------- */
 
 #include "compiler.h"
-#include "nasmlib.h"
+#include "nctype.h"
 #include "hashtbl.h"
 
 static const uint64_t crc64_tab[256] = {
@@ -187,3 +187,25 @@ uint64_t crc64i(uint64_t crc, const char *str)
 
     return crc;
 }
+
+uint64_t crc64b(uint64_t crc, const void *data, size_t len)
+{
+    const uint8_t *str = data;
+
+    while (len--) {
+	crc = crc64_tab[(uint8_t)crc ^ *str++] ^ (crc >> 8);
+    }
+
+    return crc;
+}
+
+uint64_t crc64ib(uint64_t crc, const void *data, size_t len)
+{
+    const uint8_t *str = data;
+
+    while (len--) {
+	crc = crc64_tab[(uint8_t)crc ^ nasm_tolower(*str++)] ^ (crc >> 8);
+    }
+
+    return crc;
+}
diff --git a/nasmlib/errfile.c b/nasmlib/errfile.c
index ee4bae8e..a2c5e4ff 100644
--- a/nasmlib/errfile.c
+++ b/nasmlib/errfile.c
@@ -1,5 +1,4 @@
 #include "compiler.h"
-#include <stdio.h>
 
 FILE *error_file;
 
diff --git a/nasmlib/file.c b/nasmlib/file.c
index c0b4e781..4902bb51 100644
--- a/nasmlib/file.c
+++ b/nasmlib/file.c
@@ -37,9 +37,9 @@ void nasm_read(void *ptr, size_t size, FILE *f)
 {
     size_t n = fread(ptr, 1, size, f);
     if (ferror(f)) {
-        nasm_fatal(0, "unable to read input: %s", strerror(errno));
+        nasm_fatal("unable to read input: %s", strerror(errno));
     } else if (n != size || feof(f)) {
-        nasm_fatal(0, "fatal short read on input");
+        nasm_fatal("fatal short read on input");
     }
 }
 
@@ -47,7 +47,7 @@ void nasm_write(const void *ptr, size_t size, FILE *f)
 {
     size_t n = fwrite(ptr, 1, size, f);
     if (n != size || ferror(f) || feof(f))
-        nasm_fatal(0, "unable to write output: %s", strerror(errno));
+        nasm_fatal("unable to write output: %s", strerror(errno));
 }
 
 void fwriteint16_t(uint16_t data, FILE * fp)
@@ -119,8 +119,8 @@ FILE *nasm_open_read(const char *filename, enum file_flags flags)
         f = fopen(filename, (flags & NF_TEXT) ? "rt" : "rb");
 
     if (!f && (flags & NF_FATAL))
-        nasm_fatal(ERR_NOFILE, "unable to open input file: `%s': %s",
-                   filename, strerror(errno));
+        nasm_fatalf(ERR_NOFILE, "unable to open input file: `%s': %s",
+                    filename, strerror(errno));
 
     return f;
 }
@@ -132,8 +132,8 @@ FILE *nasm_open_write(const char *filename, enum file_flags flags)
     f = fopen(filename, (flags & NF_TEXT) ? "wt" : "wb");
 
     if (!f && (flags & NF_FATAL))
-        nasm_fatal(ERR_NOFILE, "unable to open output file: `%s': %s",
-                   filename, strerror(errno));
+        nasm_fatalf(ERR_NOFILE, "unable to open output file: `%s': %s",
+                    filename, strerror(errno));
 
     return f;
 }
diff --git a/nasmlib/file.h b/nasmlib/file.h
index 4069ec64..399e3190 100644
--- a/nasmlib/file.h
+++ b/nasmlib/file.h
@@ -44,7 +44,6 @@
 # include <fcntl.h>
 #endif
 #ifdef HAVE_SYS_TYPES_H
-# include <sys/types.h>
 #endif
 #ifdef HAVE_SYS_STAT_H
 # include <sys/stat.h>
diff --git a/nasmlib/hashtbl.c b/nasmlib/hashtbl.c
index bc0776b8..3f4a957c 100644
--- a/nasmlib/hashtbl.c
+++ b/nasmlib/hashtbl.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *
- *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -39,14 +39,14 @@
 
 #include "compiler.h"
 
-#include <string.h>
 #include "nasm.h"
 #include "hashtbl.h"
 
-#define HASH_MAX_LOAD   2 /* Higher = more memory-efficient, slower */
+#define HASH_MAX_LOAD   2	/* Higher = more memory-efficient, slower */
+#define HASH_INIT_SIZE  16      /* Initial size (power of 2, min 4) */
 
-#define hash_calc(key)          crc64(CRC64_INIT, (key))
-#define hash_calci(key)         crc64i(CRC64_INIT, (key))
+#define hash_calc(key,keylen)   crc64b(CRC64_INIT, (key), (keylen))
+#define hash_calci(key,keylen)  crc64ib(CRC64_INIT, (key), (keylen))
 #define hash_max_load(size)     ((size) * (HASH_MAX_LOAD - 1) / HASH_MAX_LOAD)
 #define hash_expand(size)       ((size) << 1)
 #define hash_mask(size)         ((size) - 1)
@@ -54,129 +54,167 @@
 #define hash_inc(hash, mask)    ((((hash) >> 32) & (mask)) | 1) /* always odd */
 #define hash_pos_next(pos, inc, mask) (((pos) + (inc)) & (mask))
 
-static struct hash_tbl_node *alloc_table(size_t newsize)
+static void hash_init(struct hash_table *head)
 {
-    size_t bytes = newsize * sizeof(struct hash_tbl_node);
-    return nasm_zalloc(bytes);
-}
-
-void hash_init(struct hash_table *head, size_t size)
-{
-    nasm_assert(is_power2(size));
-    head->table    = alloc_table(size);
+    head->size     = HASH_INIT_SIZE;
     head->load     = 0;
-    head->size     = size;
-    head->max_load = hash_max_load(size);
+    head->max_load = hash_max_load(head->size);
+    nasm_newn(head->table, head->size);
 }
 
 /*
- * Find an entry in a hash table.
+ * Find an entry in a hash table.  The key can be any binary object.
  *
  * On failure, if "insert" is non-NULL, store data in that structure
  * which can be used to insert that node using hash_add().
- *
- * WARNING: this data is only valid until the very next call of
- * hash_add(); it cannot be "saved" to a later date.
+ * See hash_add() for constraints on the uses of the insert object.
  *
  * On success, return a pointer to the "data" element of the hash
  * structure.
  */
-void **hash_find(struct hash_table *head, const char *key,
-                 struct hash_insert *insert)
+void **hash_findb(struct hash_table *head, const void *key,
+                  size_t keylen, struct hash_insert *insert)
 {
-    struct hash_tbl_node *np;
-    struct hash_tbl_node *tbl = head->table;
-    uint64_t hash = hash_calc(key);
+    struct hash_node *np = NULL;
+    struct hash_node *tbl = head->table;
+    uint64_t hash = hash_calc(key, keylen);
     size_t mask = hash_mask(head->size);
     size_t pos = hash_pos(hash, mask);
     size_t inc = hash_inc(hash, mask);
 
-    while ((np = &tbl[pos])->key) {
-        if (hash == np->hash && !strcmp(key, np->key))
-            return &np->data;
-        pos = hash_pos_next(pos, inc, mask);
+    if (likely(tbl)) {
+        while ((np = &tbl[pos])->key) {
+            if (hash == np->hash &&
+                keylen == np->keylen &&
+                !memcmp(key, np->key, keylen))
+                return &np->data;
+            pos = hash_pos_next(pos, inc, mask);
+        }
     }
 
     /* Not found.  Store info for insert if requested. */
     if (insert) {
+        insert->node.hash = hash;
+        insert->node.key = key;
+        insert->node.keylen = keylen;
+        insert->node.data = NULL;
         insert->head  = head;
-        insert->hash  = hash;
         insert->where = np;
     }
     return NULL;
 }
 
 /*
- * Same as hash_find, but for case-insensitive hashing.
+ * Same as hash_findb(), but for a C string.
  */
-void **hash_findi(struct hash_table *head, const char *key,
-                  struct hash_insert *insert)
+void **hash_find(struct hash_table *head, const char *key,
+                 struct hash_insert *insert)
+{
+    return hash_findb(head, key, strlen(key)+1, insert);
+}
+
+/*
+ * Same as hash_findb(), but for case-insensitive hashing.
+ */
+void **hash_findib(struct hash_table *head, const void *key, size_t keylen,
+                   struct hash_insert *insert)
 {
-    struct hash_tbl_node *np;
-    struct hash_tbl_node *tbl = head->table;
-    uint64_t hash = hash_calci(key);
+    struct hash_node *np = NULL;
+    struct hash_node *tbl = head->table;
+    uint64_t hash = hash_calci(key, keylen);
     size_t mask = hash_mask(head->size);
     size_t pos = hash_pos(hash, mask);
     size_t inc = hash_inc(hash, mask);
 
-    while ((np = &tbl[pos])->key) {
-        if (hash == np->hash && !nasm_stricmp(key, np->key))
-            return &np->data;
-        pos = hash_pos_next(pos, inc, mask);
+    if (likely(tbl)) {
+        while ((np = &tbl[pos])->key) {
+            if (hash == np->hash &&
+                keylen == np->keylen &&
+                !nasm_memicmp(key, np->key, keylen))
+                return &np->data;
+            pos = hash_pos_next(pos, inc, mask);
+        }
     }
 
     /* Not found.  Store info for insert if requested. */
     if (insert) {
+        insert->node.hash = hash;
+        insert->node.key = key;
+        insert->node.keylen = keylen;
+        insert->node.data = NULL;
         insert->head  = head;
-        insert->hash  = hash;
         insert->where = np;
     }
     return NULL;
 }
 
 /*
+ * Same as hash_find(), but for case-insensitive hashing.
+ */
+void **hash_findi(struct hash_table *head, const char *key,
+                  struct hash_insert *insert)
+{
+    return hash_findib(head, key, strlen(key)+1, insert);
+}
+
+/*
  * Insert node.  Return a pointer to the "data" element of the newly
  * created hash node.
+ *
+ * The following constraints apply:
+ * 1. A call to hash_add() invalidates all other outstanding hash_insert
+ *    objects; attempting to use them causes a wild pointer reference.
+ * 2. The key provided must exactly match the key passed to hash_find*(),
+ *    but it does not have to point to the same storage address. The key
+ *    buffer provided to this function must not be freed for the lifespan
+ *    of the hash. NULL will use the same pointer that was passed to
+ *    hash_find*().
  */
-void **hash_add(struct hash_insert *insert, const char *key, void *data)
+void **hash_add(struct hash_insert *insert, const void *key, void *data)
 {
     struct hash_table *head  = insert->head;
-    struct hash_tbl_node *np = insert->where;
+    struct hash_node *np = insert->where;
+
+    if (unlikely(!np)) {
+        hash_init(head);
+        /* The hash table is empty, so we don't need to iterate here */
+        np = &head->table[hash_pos(insert->node.hash, hash_mask(head->size))];
+    }
 
     /*
      * Insert node.  We can always do this, even if we need to
      * rebalance immediately after.
      */
-    np->hash = insert->hash;
-    np->key  = key;
+    *np = insert->node;
     np->data = data;
+    if (key)
+        np->key = key;
 
-    if (++head->load > head->max_load) {
+    if (unlikely(++head->load > head->max_load)) {
         /* Need to expand the table */
-        size_t newsize                  = hash_expand(head->size);
-        struct hash_tbl_node *newtbl    = alloc_table(newsize);
-        size_t mask                     = hash_mask(newsize);
+        size_t newsize           = hash_expand(head->size);
+        struct hash_node *newtbl;
+        size_t mask              = hash_mask(newsize);
+        struct hash_node *op, *xp;
+        size_t i;
 
-        if (head->table) {
-            struct hash_tbl_node *op, *xp;
-            size_t i;
+        nasm_newn(newtbl, newsize);
 
-            /* Rebalance all the entries */
-            for (i = 0, op = head->table; i < head->size; i++, op++) {
-                if (op->key) {
-                    size_t pos = hash_pos(op->hash, mask);
-                    size_t inc = hash_inc(op->hash, mask);
+        /* Rebalance all the entries */
+        for (i = 0, op = head->table; i < head->size; i++, op++) {
+            if (op->key) {
+                size_t pos = hash_pos(op->hash, mask);
+                size_t inc = hash_inc(op->hash, mask);
 
-                    while ((xp = &newtbl[pos])->key)
-                        pos = hash_pos_next(pos, inc, mask);
+                while ((xp = &newtbl[pos])->key)
+                    pos = hash_pos_next(pos, inc, mask);
 
-                    *xp = *op;
-                    if (op == np)
-                        np = xp;
-                }
+                *xp = *op;
+                if (op == np)
+                    np = xp;
             }
-            nasm_free(head->table);
         }
+        nasm_free(head->table);
 
         head->table    = newtbl;
         head->size     = newsize;
@@ -187,36 +225,29 @@ void **hash_add(struct hash_insert *insert, const char *key, void *data)
 }
 
 /*
- * Iterate over all members of a hash set.  For the first call,
- * iterator should be initialized to NULL.  Returns the data pointer,
- * or NULL on failure.
+ * Iterate over all members of a hash set. For the first call, iter
+ * should be as initialized by hash_iterator_init(). Returns a struct
+ * hash_node representing the current object, or NULL if we have
+ * reached the end of the hash table.
+ *
+ * Calling hash_add() will invalidate the iterator.
  */
-void *hash_iterate(const struct hash_table *head,
-                   struct hash_tbl_node **iterator,
-                   const char **key)
+const struct hash_node *hash_iterate(struct hash_iterator *iter)
 {
-    struct hash_tbl_node *np = *iterator;
-    struct hash_tbl_node *ep = head->table + head->size;
-
-    if (!np) {
-        np = head->table;
-        if (!np)
-            return NULL;        /* Uninitialized table */
-    }
+    const struct hash_table *head = iter->head;
+    const struct hash_node *cp = iter->next;
+    const struct hash_node *ep = head->table + head->size;
 
-    while (np < ep) {
-        if (np->key) {
-            *iterator = np + 1;
-            if (key)
-                *key = np->key;
-            return np->data;
+    /* For an empty table, cp == ep == NULL */
+    while (cp < ep) {
+        if (cp->key) {
+            iter->next = cp+1;
+            return cp;
         }
-        np++;
+        cp++;
     }
 
-    *iterator = NULL;
-    if (key)
-        *key = NULL;
+    iter->next = head->table;
     return NULL;
 }
 
@@ -229,7 +260,7 @@ void *hash_iterate(const struct hash_table *head,
 void hash_free(struct hash_table *head)
 {
     void *p = head->table;
-    head->table = NULL;
+    memset(head, 0, sizeof *head);
     nasm_free(p);
 }
 
@@ -242,14 +273,13 @@ void hash_free(struct hash_table *head)
  */
 void hash_free_all(struct hash_table *head, bool free_keys)
 {
-    struct hash_tbl_node *iter = NULL;
-    const char *keyp;
-    void *d;
+    struct hash_iterator it;
+    const struct hash_node *np;
 
-    while ((d = hash_iterate(head, &iter, &keyp))) {
-        nasm_free(d);
+    hash_for_each(head, it, np) {
+        nasm_free(np->data);
         if (free_keys)
-            nasm_free((void *)keyp);
+            nasm_free((void *)np->key);
     }
 
     hash_free(head);
diff --git a/nasmlib/md5c.c b/nasmlib/md5c.c
index 9b061361..79cf4e09 100644
--- a/nasmlib/md5c.c
+++ b/nasmlib/md5c.c
@@ -16,7 +16,6 @@
  */
 
 #include "md5.h"
-#include <string.h>             /* for memcpy() */
 
 #ifdef WORDS_LITTLEENDIAN
 #define byteReverse(buf, len)	/* Nothing */
diff --git a/nasmlib/nctype.c b/nasmlib/nctype.c
new file mode 100644
index 00000000..f30f37e0
--- /dev/null
+++ b/nasmlib/nctype.c
@@ -0,0 +1,116 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
+ *   See the file AUTHORS included with the NASM distribution for
+ *   the specific copyright holders.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+#include "nctype.h"
+#include <ctype.h>
+
+/*
+ * Table of tolower() results.  This avoids function calls
+ * on some platforms.
+ */
+unsigned char nasm_tolower_tab[256];
+
+static void tolower_tab_init(void)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+	nasm_tolower_tab[i] = tolower(i);
+}
+
+/*
+ * Table of character type flags; some are simply <ctype.h>,
+ * some are NASM-specific.
+ */
+
+uint16_t nasm_ctype_tab[256];
+
+#if !defined(HAVE_ISCNTRL) && !defined(iscntrl)
+# define iscntrl(x) ((x) < 32)
+#endif
+#if !defined(HAVE_ISASCII) && !defined(isascii)
+# define isascii(x) ((x) < 128)
+#endif
+
+static void ctype_tab_init(void)
+{
+    int i;
+
+    for (i = 0; i < 256; i++) {
+        enum nasm_ctype ct = 0;
+
+        if (iscntrl(i))
+            ct |= NCT_CTRL;
+
+        if (isascii(i))
+            ct |= NCT_ASCII;
+
+        if (isspace(i) && i != '\n')
+            ct |= NCT_SPACE;
+
+        if (isalpha(i)) {
+            ct |= (nasm_tolower(i) == i) ? NCT_LOWER : NCT_UPPER;
+            ct |= NCT_ID|NCT_IDSTART;
+        }
+
+        if (isdigit(i))
+            ct |= NCT_DIGIT|NCT_ID;
+
+        if (isxdigit(i))
+            ct |= NCT_HEX;
+
+        /* Non-ASCII character, but no ctype returned (e.g. Unicode) */
+        if (!ct && !ispunct(i))
+            ct |= NCT_ID|NCT_IDSTART;
+
+        nasm_ctype_tab[i] = ct;
+    }
+
+    nasm_ctype_tab['-']  |= NCT_MINUS;
+    nasm_ctype_tab['$']  |= NCT_DOLLAR|NCT_ID;
+    nasm_ctype_tab['_']  |= NCT_UNDER|NCT_ID|NCT_IDSTART;
+    nasm_ctype_tab['.']  |= NCT_ID|NCT_IDSTART;
+    nasm_ctype_tab['@']  |= NCT_ID|NCT_IDSTART;
+    nasm_ctype_tab['?']  |= NCT_ID|NCT_IDSTART;
+    nasm_ctype_tab['#']  |= NCT_ID;
+    nasm_ctype_tab['~']  |= NCT_ID;
+    nasm_ctype_tab['\''] |= NCT_QUOTE;
+    nasm_ctype_tab['\"'] |= NCT_QUOTE;
+    nasm_ctype_tab['`']  |= NCT_QUOTE;
+}
+
+void nasm_ctype_init(void)
+{
+    tolower_tab_init();
+    ctype_tab_init();
+}
diff --git a/nasmlib/raa.c b/nasmlib/raa.c
index 77329d3c..feb86970 100644
--- a/nasmlib/raa.c
+++ b/nasmlib/raa.c
@@ -33,6 +33,7 @@
 
 #include "nasmlib.h"
 #include "raa.h"
+#include "ilog2.h"
 
 /*
  * Routines to manage a dynamic random access array of int64_ts which
@@ -40,151 +41,163 @@
  * chunk.
  */
 
-#define RAA_BLKSHIFT	15      /* 2**this many longs allocated at once */
-#define RAA_BLKSIZE	(1 << RAA_BLKSHIFT)
-#define RAA_LAYERSHIFT	15      /* 2**this many _pointers_ allocated */
-#define RAA_LAYERSIZE	(1 << RAA_LAYERSHIFT)
+#define RAA_LAYERSHIFT	11      /* 2^this many items per layer */
+#define RAA_LAYERSIZE	((size_t)1 << RAA_LAYERSHIFT)
+#define RAA_LAYERMASK	(RAA_LAYERSIZE-1)
 
+typedef struct RAA RAA;
 typedef union RAA_UNION RAA_UNION;
 typedef struct RAA_LEAF RAA_LEAF;
 typedef struct RAA_BRANCH RAA_BRANCH;
 
-struct real_raa {
+union intorptr {
+    int64_t i;
+    void *p;
+};
+
+struct RAA {
+    /* Last position in this RAA */
+    raaindex endposn;
+
     /*
      * Number of layers below this one to get to the real data. 0
-     * means this structure is a leaf, holding RAA_BLKSIZE real
+     * means this structure is a leaf, holding RAA_LAYERSIZE real
      * data items; 1 and above mean it's a branch, holding
      * RAA_LAYERSIZE pointers to the next level branch or leaf
      * structures.
      */
-    int layers;
+    unsigned int layers;
 
     /*
      * Number of real data items spanned by one position in the
      * `data' array at this level. This number is 0 trivially, for
-     * a leaf (level 0): for a level 1 branch it should be
-     * RAA_BLKSHIFT, and for a level 2 branch it's
-     * RAA_LAYERSHIFT+RAA_BLKSHIFT.
+     * a leaf (level 0): for a level n branch it should be
+     * n*RAA_LAYERSHIFT.
      */
-    int shift;
+    unsigned int shift;
 
+    /*
+     * The actual data
+     */
     union RAA_UNION {
         struct RAA_LEAF {
-            union intorptr data[RAA_BLKSIZE];
+            union intorptr data[RAA_LAYERSIZE];
         } l;
         struct RAA_BRANCH {
-            struct real_raa *data[RAA_LAYERSIZE];
+            struct RAA *data[RAA_LAYERSIZE];
         } b;
     } u;
 };
 
-struct RAA {
-    struct real_raa raa;
-};
-struct RAAPTR {
-    struct real_raa raa;
-};
-
-#define LEAFSIZ (sizeof(struct real_raa)-sizeof(RAA_UNION)+sizeof(RAA_LEAF))
-#define BRANCHSIZ (sizeof(struct real_raa)-sizeof(RAA_UNION)+sizeof(RAA_BRANCH))
+#define LEAFSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_LEAF))
+#define BRANCHSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_BRANCH))
 
-#define LAYERSHIFT(r) ( (r)->layers==0 ? RAA_BLKSHIFT : RAA_LAYERSHIFT )
-
-static struct real_raa *raa_init_layer(int layers)
+static struct RAA *raa_init_layer(raaindex posn, unsigned int layers)
 {
-    struct real_raa *r;
-
-    if (layers == 0) {
-        r = nasm_zalloc(LEAFSIZ);
-        r->shift = 0;
-    } else {
-        r = nasm_zalloc(BRANCHSIZ);
-        r->layers = layers;
-        r->shift = (RAA_BLKSHIFT - RAA_LAYERSHIFT) + layers * RAA_LAYERSHIFT;
-    }
+    struct RAA *r;
+    raaindex posmask;
+
+    r = nasm_zalloc((layers == 0) ? LEAFSIZ : BRANCHSIZ);
+    r->shift = layers * RAA_LAYERSHIFT;
+    r->layers    = layers;
+    posmask = ((raaindex)RAA_LAYERSIZE << r->shift) - 1;
+    r->endposn   = posn | posmask;
     return r;
 }
 
-struct real_raa *real_raa_init(void)
+void raa_free(struct RAA *r)
 {
-    return raa_init_layer(0);
-}
+    if (!r)
+        return;
 
-void real_raa_free(struct real_raa *r)
-{
     if (r->layers) {
-        struct real_raa **p;
-        for (p = r->u.b.data; p - r->u.b.data < RAA_LAYERSIZE; p++)
-            if (*p)
-                real_raa_free(*p);
+        struct RAA **p = r->u.b.data;
+        size_t i;
+        for (i = 0; i < RAA_LAYERSIZE; i++)
+            raa_free(*p++);
     }
     nasm_free(r);
 }
 
-static const union intorptr *real_raa_read(struct real_raa *r, int32_t posn)
+static const union intorptr *real_raa_read(struct RAA *r, raaindex posn)
 {
-    if ((uint32_t) posn >= (UINT32_C(1) << (r->shift + LAYERSHIFT(r))))
+    nasm_assert(posn <= (~(raaindex)0 >> 1));
+
+    if (unlikely(!r || posn > r->endposn))
         return NULL;            /* Beyond the end */
-    while (r->layers > 0) {
-        int32_t l = posn >> r->shift;
-        posn &= (UINT32_C(1) << r->shift) - 1;
+
+    while (r->layers) {
+        size_t l = (posn >> r->shift) & RAA_LAYERMASK;
         r = r->u.b.data[l];
         if (!r)
             return NULL;        /* Not present */
     }
-    return &r->u.l.data[posn];
+    return &r->u.l.data[posn & RAA_LAYERMASK];
 }
 
-int64_t raa_read(struct RAA *r, int32_t pos)
+int64_t raa_read(struct RAA *r, raaindex pos)
 {
     const union intorptr *ip;
 
-    ip = real_raa_read((struct real_raa *)r, pos);
+    ip = real_raa_read(r, pos);
     return ip ? ip->i : 0;
 }
 
-void *raa_read_ptr(struct RAAPTR *r, int32_t pos)
+void *raa_read_ptr(struct RAA *r, raaindex pos)
 {
     const union intorptr *ip;
 
-    ip = real_raa_read((struct real_raa *)r, pos);
+    ip = real_raa_read(r, pos);
     return ip ? ip->p : NULL;
 }
 
 
-struct real_raa *
-real_raa_write(struct real_raa *r, int32_t posn, union intorptr value)
+static struct RAA *
+real_raa_write(struct RAA *r, raaindex posn, union intorptr value)
 {
-    struct real_raa *result;
-
-    nasm_assert(posn >= 0);
+    struct RAA *result;
 
-    while ((UINT32_C(1) << (r->shift + LAYERSHIFT(r))) <= (uint32_t) posn) {
-        /*
-         * Must add a layer.
-         */
-        struct real_raa *s;
+    nasm_assert(posn <= (~(raaindex)0 >> 1));
 
-        s = nasm_zalloc(BRANCHSIZ);
-        s->layers = r->layers + 1;
-        s->shift = LAYERSHIFT(r) + r->shift;
-        s->u.b.data[0] = r;
-        r = s;
+    if (unlikely(!r)) {
+        /* Create a new top-level RAA */
+        r = raa_init_layer(posn, ilog2_64(posn)/RAA_LAYERSHIFT);
+    } else {
+        while (unlikely(r->endposn < posn)) {
+            /* We need to add layers to an existing RAA */
+            struct RAA *s = raa_init_layer(r->endposn, r->layers + 1);
+            s->u.b.data[0] = r;
+            r = s;
+        }
     }
 
     result = r;
 
-    while (r->layers > 0) {
-        struct real_raa **s;
-        int32_t l = posn >> r->shift;
-        posn &= (UINT32_C(1) << r->shift) - 1;
+    while (r->layers) {
+        struct RAA **s;
+        size_t l = (posn >> r->shift) & RAA_LAYERMASK;
         s = &r->u.b.data[l];
-        if (!*s)
-            *s = raa_init_layer(r->layers - 1);
+        if (unlikely(!*s))
+            *s = raa_init_layer(posn, r->layers - 1);
         r = *s;
     }
-
-    r->u.l.data[posn] = value;
+    r->u.l.data[posn & RAA_LAYERMASK] = value;
 
     return result;
 }
+
+struct RAA *raa_write(struct RAA *r, raaindex posn, int64_t value)
+{
+    union intorptr ip;
+
+    ip.i = value;
+    return real_raa_write(r, posn, ip);
+}
+
+struct RAA *raa_write_ptr(struct RAA *r, raaindex posn, void *value)
+{
+    union intorptr ip;
+
+    ip.p = value;
+    return real_raa_write(r, posn, ip);
+}
diff --git a/nasmlib/readnum.c b/nasmlib/readnum.c
index 60d856e1..b491bc73 100644
--- a/nasmlib/readnum.c
+++ b/nasmlib/readnum.c
@@ -37,7 +37,7 @@
 
 #include "compiler.h"
 
-#include <ctype.h>
+#include "nctype.h"
 
 #include "nasmlib.h"
 #include "error.h"
@@ -163,10 +163,16 @@ int64_t readnum(const char *str, bool *error)
         r++;
     }
 
-    if (warn)
-        nasm_error(ERR_WARNING | ERR_PASS1 | WARN_NOV,
+    if (warn) {
+        /*!
+         *!number-overflow [on] numeric constant does not fit
+         *!    covers warnings about numeric constants which
+         *!    don't fit in 64 bits.
+         */
+        nasm_error(ERR_WARNING  | WARN_NUMBER_OVERFLOW,
 		   "numeric constant %s does not fit in 64 bits",
 		   str);
+    }
 
     return result * sign;
 }
diff --git a/nasmlib/realpath.c b/nasmlib/realpath.c
index d93dc15f..c31003e3 100644
--- a/nasmlib/realpath.c
+++ b/nasmlib/realpath.c
@@ -37,9 +37,7 @@
 
 #include "compiler.h"
 
-#include <stdlib.h>
 #include <errno.h>
-#include <limits.h>
 #ifdef HAVE_UNISTD_H
 # include <unistd.h>
 #endif
diff --git a/nasmlib/string.c b/nasmlib/string.c
index 907df32f..4ee3ecbb 100644
--- a/nasmlib/string.c
+++ b/nasmlib/string.c
@@ -36,26 +36,8 @@
  */
 
 #include "compiler.h"
-
-#include <stdlib.h>
-#include <ctype.h>
-
 #include "nasmlib.h"
-
-/*
- * Prepare a table of tolower() results.  This avoids function calls
- * on some platforms.
- */
-
-unsigned char nasm_tolower_tab[256];
-
-void tolower_init(void)
-{
-    int i;
-
-    for (i = 0; i < 256; i++)
-	nasm_tolower_tab[i] = tolower(i);
-}
+#include "nctype.h"
 
 #ifndef nasm_stricmp
 int nasm_stricmp(const char *s1, const char *s2)
diff --git a/nasmlib/strlist.c b/nasmlib/strlist.c
index cf475278..a0687cce 100644
--- a/nasmlib/strlist.c
+++ b/nasmlib/strlist.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *
- *   Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -32,69 +32,175 @@
  * ----------------------------------------------------------------------- */
 
 /*
- * strlist.c - simple linked list of strings
+ * strlist.c - list of ordered strings, optionally made unique
  */
 
-#include "compiler.h"
+#include "strlist.h"
 
-#include <string.h>
+/*
+ * Create a string list. The list can be uniqizing or not.
+ */
+struct strlist *strlist_alloc(bool uniq)
+{
+	struct strlist *list = nasm_zalloc(sizeof(*list));
+	list->tailp = &list->head;
+        list->uniq = uniq;
+	return list;
+}
 
-#include "strlist.h"
+/*
+ * Append a string to a string list. Return the entry pointer, which
+ * may be a pre-existing entry for a uniqizing list.
+ */
+
+static const struct strlist_entry *
+strlist_add_common(struct strlist *list, struct strlist_entry *e,
+		   struct hash_insert *hi)
+{
+        e->offset = list->size;
+        e->next = NULL;
+
+	*list->tailp = e;
+	list->tailp = &e->next;
+	list->nstr++;
+	list->size += e->size;
+
+        if (list->uniq)
+		hash_add(hi, e->str, (void *)e);
+
+	return e;
+}
+
+const struct strlist_entry *
+strlist_add(struct strlist *list, const char *str)
+{
+	struct strlist_entry *e;
+	struct hash_insert hi;
+	size_t size;
+
+	if (!list)
+		return NULL;
+
+	size = strlen(str) + 1;
+	if (list->uniq) {
+		void **dp = hash_findb(&list->hash, str, size, &hi);
+		if (dp)
+			return *dp;
+	}
+
+	/* Structure already has char[1] as EOS */
+	e = nasm_malloc(sizeof(*e) - 1 + size);
+	e->size = size;
+	memcpy(e->str, str, size);
+
+	return strlist_add_common(list, e, &hi);
+}
+
+/*
+ * printf() to a string list
+ */
+const struct strlist_entry *
+strlist_vprintf(struct strlist *list, const char *fmt, va_list ap)
+{
+	struct strlist_entry *e;
+	struct hash_insert hi;
+
+	if (!list)
+		return NULL;
+
+	e = nasm_vaxprintf(offsetin(*e, str), fmt, ap);
+	e->size = nasm_aprintf_size();
+
+	if (list->uniq) {
+		void **dp = hash_findb(&list->hash, e->str, e->size, &hi);
+		if (dp) {
+			nasm_free(e);
+			return *dp;
+		}
+	}
+
+	return strlist_add_common(list, e, &hi);
+}
 
-static inline StrList *nasm_str_to_strlist(const char *str)
+const struct strlist_entry *
+strlist_printf(struct strlist *list, const char *fmt, ...)
 {
-    size_t l = strlen(str) + 1;
-    StrList *sl = nasm_malloc(l + sizeof sl->next);
+	va_list ap;
+	const struct strlist_entry *e;
 
-    memcpy(sl->str, str, l);
-    sl->next = NULL;
+	va_start(ap, fmt);
+	e = strlist_vprintf(list, fmt, ap);
+	va_end(ap);
 
-    return sl;
+	return e;
 }
 
 /*
- * Append a string list entry to a string list if and only if it isn't
- * already there.  Return true if it was added.
+ * Free a string list. Sets the pointed to pointer to NULL.
  */
-bool nasm_add_to_strlist(StrList **head, StrList *entry)
+void strlist_free(struct strlist **listp)
 {
-    StrList *list;
-
-    if (!head)
-        return false;
-
-    list = *head;
-    while (list) {
-        if (!strcmp(list->str, entry->str))
-            return false;
-        head = &list->next;
-        list = list->next;
-    }
-
-    *head = entry;
-    entry->next = NULL;
-    return true;
+	struct strlist *list = *listp;
+	struct strlist_entry *e, *tmp;
+
+	if (!list)
+		return;
+
+	if (list->uniq)
+		hash_free(&list->hash);
+
+	list_for_each_safe(e, tmp, list->head)
+		nasm_free(e);
+
+	nasm_free(list);
+	*listp = NULL;
 }
 
 /*
- * Append a string to a string list if and only if it isn't
- * already there.  Return true if it was added.
+ * Search the string list for an entry. If found, return the entry pointer.
+ * Only possible on a uniqizing list.
  */
-bool nasm_add_string_to_strlist(StrList **head, const char *str)
+const struct strlist_entry *
+strlist_find(const struct strlist *list, const char *str)
 {
-    StrList *list;
+	void **hf;
 
-    if (!head)
-        return false;
+        nasm_assert(list->uniq);
 
-    list = *head;
-    while (list) {
-        if (!strcmp(list->str, str))
-            return false;
-        head = &list->next;
-        list = list->next;
-    }
+	hf = hash_find((struct hash_table *)&list->hash, str, NULL);
+	return hf ? *hf : NULL;
+}
+
+/*
+ * Produce a linearized buffer containing the whole list, in order;
+ * The character "sep" is the separator between strings; this is
+ * typically either 0 or '\n'. strlist_size() will give the size of
+ * the returned buffer.
+ */
+void *strlist_linearize(const struct strlist *list, char sep)
+{
+	const struct strlist_entry *sl;
+	char *buf = nasm_malloc(list->size);
+	char *p = buf;
+
+	strlist_for_each(sl, list) {
+		p = mempcpy(p, sl->str, sl->size);
+		p[-1] = sep;
+	}
+
+	return buf;
+}
+
+/*
+ * Output a string list to a file. The separator can be any string.
+ */
+void strlist_write(const struct strlist *list, const char *sep, FILE *f)
+{
+	const struct strlist_entry *sl;
+	size_t seplen = strlen(sep);
 
-    *head = nasm_str_to_strlist(str);
-    return true;
+	strlist_for_each(sl, list) {
+		fwrite(sl->str, 1, sl->size - 1, f);
+		fwrite(sep, 1, seplen, f);
+	}
 }
diff --git a/nasmlib/ver.c b/nasmlib/ver.c
index 98362e35..9f80f79c 100644
--- a/nasmlib/ver.c
+++ b/nasmlib/ver.c
@@ -44,8 +44,45 @@ const char nasm_compile_options[] = ""
     ;
 
 /* These are used by some backends. */
-const char nasm_comment[] =
+static const char __nasm_comment[] =
     "The Netwide Assembler " NASM_VER;
 
-const char nasm_signature[] =
+static const char __nasm_signature[] =
     "NASM " NASM_VER;
+
+/* These are constant so we could pass regression tests  */
+static const char __nasm_comment_const[] ="The Netwide Assembler CONST";
+static const char __nasm_signature_const[] = "NASM CONST";
+
+int nasm_test_run(void)
+{
+	return getenv("NASM_TEST_RUN") ? 1 : 0;
+}
+
+const char *nasm_comment(void)
+{
+	if (!nasm_test_run())
+		return __nasm_comment;
+	return __nasm_comment_const;
+}
+
+size_t nasm_comment_len(void)
+{
+	if (!nasm_test_run())
+		return strlen(__nasm_comment);
+	return strlen(__nasm_comment_const);
+}
+
+const char *nasm_signature(void)
+{
+	if (!nasm_test_run())
+		return __nasm_signature;
+	return __nasm_signature_const;
+}
+
+size_t nasm_signature_len(void)
+{
+	if (!nasm_test_run())
+		return strlen(__nasm_signature);
+	return strlen(__nasm_signature_const);
+}