From 0d086e18124f6c6bed14a62f84d5a4265f1c7d9b Mon Sep 17 00:00:00 2001 From: neil Date: Sun, 20 May 2001 06:26:45 +0000 Subject: * Makefile.in (OBJS, LIBCPP_OBJS, LIBCPP_DEPS, cpplib.o, cpphash.o, fix-header): Update. (hashtable.o): New target. * c-common.h: Include cpplib.h. Define C_RID_CODE and struct c_common_identifier here. * c-lang.c (c_init_options): Update. Call set_identifier_size. * c-lex.c (c_lex): Update. * c-pragma.h: Update. * c-tree.h (struct lang_identifier): Contain c_common_identifier. Delete rid_code. (C_RID_CODE): Delete. * cpphash.c: Rewrite to use hashtable.c. * cpphash.h: Update include guards. (struct cpp_reader): Remove hashtab. hash_ob and buffer_ob are no longer pointers. Add hash_table and our_hashtable. (HASHSTEP, _cpp_init_hashtable, _cpp_lookup_with_hash): Delete. (_cpp_cleanup_hashtable): Rename _cpp_destroy_hashtable. (_cpp_cleanup_stacks): Rename _cpp_init_directives. * cppinit.c (cpp_create_reader): Update. * cpplex.c (cpp_ideq, parse_identifier, cpp_output_token): Update. (cpp_interpret_charconst): Eliminate warning. * cpplib.c (do_pragma, do_endif, push_conditional, cpp_push_buffer, cpp_pop_buffer): Update. (_cpp_init_stacks): Rename cpp_init_directives. (_cpp_cleanup_stacks): Remove. * cpplib.h: Update include guards. Include tree-core.h and c-rid.h. (cpp_hashnode, cpp_token, NODE_LEN, NODE_NAME, cpp_forall_identifiers, cpp_create_reader): Update. (C_RID_CODE, cpp_make_node): New. (c_common_identifier): New identifier node for C front ends. * cppmain.c (main): Update. * fix-header.c (read_scan_file): Update. * flags.h (id_clash_len): Make unsigned. * ggc.h (ggc_mark_nonnull_tree): New. * hashtable.c: New. * hashtable.h: New. * stringpool.c: Update comments and copyright. Update to use hashtable.c. * toplev.c (approx_sqrt): Move to hashtable.c. (id_clash_len): Make unsigned. * toplev.h (ident_hash): New. * tree.c (gcc_obstack_init): Move to hashtable.c. * tree.h: Include hashtable.h. (IDENTIFIER_POINTER, IDENTIFIER_LENGTH): Update. (GCC_IDENT_TO_HT_IDENT, HT_IDENT_TO_GCC_IDENT): New. (struct tree_identifier): Update. (make_identifier): New. cp: * cp-tree.h (struct lang_identifier, C_RID_YYCODE): Update. (C_RID_CODE): Remove. * lex.c (cxx_init_options): Call set_identifier_size. Update. (init_parse): Don't do it here. objc: * objc-act.c (objc_init_options): Call set_identifier_size. Update. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@42334 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/stringpool.c | 412 +++++++++++++++++-------------------------------------- 1 file changed, 125 insertions(+), 287 deletions(-) (limited to 'gcc/stringpool.c') diff --git a/gcc/stringpool.c b/gcc/stringpool.c index 0346dcfe34c..6d440056d50 100644 --- a/gcc/stringpool.c +++ b/gcc/stringpool.c @@ -1,5 +1,5 @@ /* String pool for GCC. - Copyright (C) 2000 Free Software Foundation, Inc. + Copyright (C) 2000, 2001 Free Software Foundation, Inc. This file is part of GNU CC. @@ -18,25 +18,25 @@ along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* String pool allocator. All strings allocated by ggc_alloc_string are - uniquified and stored in an obstack which is never shrunk. You can - associate a tree with a string if you wish; this is used to implement - get_identifier. +/* String text, identifer text and identifier node allocator. Strings + allocated by ggc_alloc_string are stored in an obstack which is + never shrunk. Identifiers are uniquely stored in a hash table. - We have our own private hash table implementation which is similar - to the one in cpphash.c (actually, it's a further refinement of - that code). libiberty's hashtab.c is not used because it requires - 100% average space overhead per string, which is unacceptable. - Also, this algorithm is faster. */ + We have our own private hash table implementation. libiberty's + hashtab.c is not used because it requires 100% average space + overhead per string, which is unacceptable. Also, this algorithm + is faster. */ #include "config.h" #include "system.h" #include "ggc.h" #include "tree.h" -#include "obstack.h" +#include "hashtable.h" #include "flags.h" #include "toplev.h" +#define IS_FE_IDENT(NODE) (TREE_CODE (NODE) == IDENTIFIER_NODE) + /* The "" allocated string. */ const char empty_string[] = ""; @@ -47,194 +47,33 @@ const char digit_vector[] = { '5', 0, '6', 0, '7', 0, '8', 0, '9', 0 }; +struct ht *ident_hash; static struct obstack string_stack; - -/* Each hashnode is just a pointer to a TREE_IDENTIFIER. */ -typedef struct tree_identifier *sp_hashnode; - -#define SP_EMPTY(NODE) ((NODE) == NULL) -#define SP_LEN(NODE) ((NODE)->length) -#define SP_TREE(NODE) ((tree) NODE) -#define SP_STR(NODE) ((NODE)->pointer) -#define SP_VALID(NODE) (TREE_CODE (SP_TREE (NODE)) == IDENTIFIER_NODE) - -/* This is the hash table structure. There's only one. */ -struct str_hash -{ - sp_hashnode *entries; - size_t nslots; /* total slots in the entries array */ - size_t nelements; /* number of live elements */ - - /* table usage statistics */ - unsigned int searches; - unsigned int collisions; -}; -#define INITIAL_HASHSIZE (16*1024) - -static struct str_hash string_hash = { 0, INITIAL_HASHSIZE, 0, 0, 0 }; - -enum insert_option { INSERT, NO_INSERT }; - -static sp_hashnode alloc_ident PARAMS ((const char *, size_t, - enum insert_option)); -static inline unsigned int calc_hash PARAMS ((const unsigned char *, size_t)); -static void mark_string_hash PARAMS ((void *)); -static void expand_string_table PARAMS ((void)); - -/* Convenience macro for iterating over the hash table. E is set to - each live entry in turn. */ -#define FORALL_IDS(E) \ -for (E = string_hash.entries; E < string_hash.entries+string_hash.nslots; E++) \ - if (!SP_EMPTY (*E) && SP_VALID (*E)) - -/* 0 while creating built-in identifiers. */ static int do_identifier_warnings; +static hashnode alloc_node PARAMS ((hash_table *)); +static int mark_ident PARAMS ((struct cpp_reader *, hashnode, const PTR)); +static void mark_ident_hash PARAMS ((void *)); +static int scan_for_clashes PARAMS ((struct cpp_reader *, hashnode, + const char *)); + /* Initialize the string pool. */ void init_stringpool () { + /* Create with 16K (2^14) entries. */ + ident_hash = ht_create (14); + ident_hash->alloc_node = alloc_node; gcc_obstack_init (&string_stack); - ggc_add_root (&string_hash, 1, sizeof string_hash, mark_string_hash); - - /* Strings need no alignment. */ - obstack_alignment_mask (&string_stack) = 0; - - string_hash.entries = (sp_hashnode *) - xcalloc (string_hash.nslots, sizeof (sp_hashnode)); + ggc_add_root (&ident_hash, 1, sizeof ident_hash, mark_ident_hash); } -/* Enable warnings on similar identifiers (if requested). - Done after the built-in identifiers are created. */ -void -start_identifier_warnings () -{ - do_identifier_warnings = 1; -} - -/* Record the size of an identifier node for the language in use. - SIZE is the total size in bytes. - This is called by the language-specific files. This must be - called before allocating any identifiers. */ -void -set_identifier_size (size) - int size; +/* Allocate a hash node. */ +static hashnode +alloc_node (table) + hash_table *table ATTRIBUTE_UNUSED; { - tree_code_length[(int) IDENTIFIER_NODE] - = (size - sizeof (struct tree_common)) / sizeof (tree); -} - -/* Calculate the hash of the string STR, which is of length LEN. */ -static inline unsigned int -calc_hash (str, len) - const unsigned char *str; - size_t len; -{ - size_t n = len; - unsigned int r = 0; -#define HASHSTEP(r, c) ((r) * 67 + (c - 113)); - - while (n--) - r = HASHSTEP (r, *str++); - - return r + len; -#undef HASHSTEP -} - -/* Internal primitive: returns the header structure for the identifier - of length LENGTH, containing CONTENTS. If that identifier already - exists in the table, returns the existing entry. If the identifier - hasn't been seen before and the last argument is INSERT, inserts - and returns a new entry. Otherwise returns NULL. */ -static sp_hashnode -alloc_ident (contents, length, insert) - const char *contents; - size_t length; - enum insert_option insert; -{ - unsigned int hash = calc_hash ((const unsigned char *)contents, length); - unsigned int hash2; - unsigned int index; - size_t sizemask; - sp_hashnode entry; - - sizemask = string_hash.nslots - 1; - index = hash & sizemask; - - /* hash2 must be odd, so we're guaranteed to visit every possible - location in the table during rehashing. */ - hash2 = ((hash * 17) & sizemask) | 1; - string_hash.searches++; - - for (;;) - { - entry = string_hash.entries[index]; - - if (SP_EMPTY (entry)) - break; - - if ((size_t) SP_LEN (entry) == length - && !memcmp (SP_STR (entry), contents, length)) - return entry; - - index = (index + hash2) & sizemask; - string_hash.collisions++; - } - - if (insert == NO_INSERT) - return NULL; - - entry = (sp_hashnode) make_node (IDENTIFIER_NODE); - string_hash.entries[index] = entry; - SP_STR (entry) = ggc_alloc_string (contents, length); - SP_LEN (entry) = length; - /* This is not yet an identifier. */ - TREE_SET_CODE (entry, ERROR_MARK); - - if (++string_hash.nelements * 4 >= string_hash.nslots * 3) - /* Must expand the string table. */ - expand_string_table (); - - return entry; -} - -/* Subroutine of alloc_ident which doubles the size of the hash table - and rehashes all the strings into the new table. Returns the entry - in the new table corresponding to ENTRY. */ -static void -expand_string_table () -{ - sp_hashnode *nentries; - sp_hashnode *e; - size_t size, sizemask; - - size = string_hash.nslots * 2; - nentries = (sp_hashnode *) xcalloc (size, sizeof (sp_hashnode)); - sizemask = size - 1; - - FORALL_IDS (e) - { - unsigned int index, hash, hash2; - - hash = calc_hash ((const unsigned char *) SP_STR (*e), SP_LEN (*e)); - hash2 = ((hash * 17) & sizemask) | 1; - index = hash & sizemask; - - for (;;) - { - if (SP_EMPTY (nentries[index])) - { - nentries[index] = *e; - break; - } - - index = (index + hash2) & sizemask; - } - } - - free (string_hash.entries); - string_hash.entries = nentries; - string_hash.nslots = size; + return GCC_IDENT_TO_HT_IDENT (make_node (IDENTIFIER_NODE)); } /* Allocate and return a string constant of length LENGTH, containing @@ -260,43 +99,40 @@ ggc_alloc_string (contents, length) return obstack_finish (&string_stack); } +/* NODE is an identifier known to the preprocessor. Make it known to + the front ends as well. */ + +void +make_identifier (node) + tree node; +{ + /* If this identifier is longer than the clash-warning length, + do a brute force search of the entire table for clashes. */ + if (warn_id_clash && do_identifier_warnings + && IDENTIFIER_LENGTH (node) >= id_clash_len) + ht_forall (ident_hash, (ht_cb) scan_for_clashes, + IDENTIFIER_POINTER (node)); + + TREE_SET_CODE (node, IDENTIFIER_NODE); +#ifdef GATHER_STATISTICS + id_string_size += IDENTIFIER_LENGTH (node); +#endif +} + /* Return an IDENTIFIER_NODE whose name is TEXT (a null-terminated string). If an identifier with that name has previously been referred to, the same node is returned this time. */ + tree get_identifier (text) const char *text; { - sp_hashnode node; - size_t length = strlen (text); - - node = alloc_ident (text, length, INSERT); - if (!SP_VALID (node)) - { - /* If this identifier is longer than the clash-warning length, - do a brute force search of the entire table for clashes. */ - if (warn_id_clash && do_identifier_warnings && length >= (size_t) id_clash_len) - { - sp_hashnode *e; - FORALL_IDS (e) - { - if (SP_LEN (*e) >= id_clash_len - && !strncmp (SP_STR (*e), text, id_clash_len)) - { - warning ("\"%s\" and \"%s\" identical in first %d characters", - text, SP_STR (*e), id_clash_len); - break; - } - } - } - - TREE_SET_CODE (node, IDENTIFIER_NODE); -#ifdef GATHER_STATISTICS - id_string_size += length; -#endif - } + hashnode ht_node = ht_lookup (ident_hash, + (const unsigned char *) text, + strlen (text), HT_ALLOC); - return SP_TREE (node); + /* ht_node can't be NULL here. */ + return HT_IDENT_TO_GCC_IDENT (ht_node); } /* If an identifier with the name TEXT (a null-terminated string) has @@ -307,90 +143,92 @@ tree maybe_get_identifier (text) const char *text; { - sp_hashnode node; + hashnode ht_node; + tree node; size_t length = strlen (text); - node = alloc_ident (text, length, NO_INSERT); - if (!SP_EMPTY (node) && SP_VALID (node)) - return SP_TREE (node); + ht_node = ht_lookup (ident_hash, (const unsigned char *) text, + length, HT_NO_INSERT); + if (ht_node) + { + node = HT_IDENT_TO_GCC_IDENT (ht_node); + if (IS_FE_IDENT (node)) + return node; + } return NULL_TREE; } +/* If this identifier is longer than the clash-warning length, + do a brute force search of the entire table for clashes. */ + +static int +scan_for_clashes (pfile, h, text) + struct cpp_reader *pfile ATTRIBUTE_UNUSED; + hashnode h; + const char *text; +{ + tree node = HT_IDENT_TO_GCC_IDENT (h); + + if (IS_FE_IDENT (node) + && IDENTIFIER_LENGTH (node) >= id_clash_len + && !memcmp (IDENTIFIER_POINTER (node), text, id_clash_len)) + { + warning ("\"%s\" and \"%s\" identical in first %d characters", + text, IDENTIFIER_POINTER (node), id_clash_len); + return 0; + } + + return 1; +} + +/* Record the size of an identifier node for the language in use. + SIZE is the total size in bytes. + This is called by the language-specific files. This must be + called before allocating any identifiers. */ + +void +set_identifier_size (size) + int size; +{ + tree_code_length[(int) IDENTIFIER_NODE] + = (size - sizeof (struct tree_common)) / sizeof (tree); +} + +/* Enable warnings on similar identifiers (if requested). + Done after the built-in identifiers are created. */ + +void +start_identifier_warnings () +{ + do_identifier_warnings = 1; +} + /* Report some basic statistics about the string pool. */ void stringpool_statistics () { - size_t nelts, nids, overhead, headers; - size_t total_bytes, longest, sum_of_squares; - double exp_len, exp_len2, exp2_len; - sp_hashnode *e; -#define SCALE(x) ((unsigned long) ((x) < 1024*10 \ - ? (x) \ - : ((x) < 1024*1024*10 \ - ? (x) / 1024 \ - : (x) / (1024*1024)))) -#define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M')) - - total_bytes = longest = sum_of_squares = nids = 0; - FORALL_IDS (e) - { - size_t n = SP_LEN (*e); - - total_bytes += n; - sum_of_squares += n*n; - if (n > longest) - longest = n; - if (SP_VALID (*e)) - nids++; - } - - nelts = string_hash.nelements; - overhead = obstack_memory_used (&string_stack) - total_bytes; - headers = string_hash.nslots * sizeof (sp_hashnode); - - fprintf (stderr, -"\nString pool\n\ -entries\t\t%lu\n\ -identifiers\t%lu (%.2f%%)\n\ -slots\t\t%lu\n\ -bytes\t\t%lu%c (%lu%c overhead)\n\ -table size\t%lu%c\n", - (unsigned long) nelts, - (unsigned long) nids, nids * 100.0 / nelts, - (unsigned long) string_hash.nslots, - SCALE (total_bytes), LABEL (total_bytes), - SCALE (overhead), LABEL (overhead), - SCALE (headers), LABEL (headers)); - - exp_len = (double)total_bytes / (double)nelts; - exp2_len = exp_len * exp_len; - exp_len2 = (double)sum_of_squares / (double)nelts; - - fprintf (stderr, -"coll/search\t%.4f\n\ -ins/search\t%.4f\n\ -avg. entry\t%.2f bytes (+/- %.2f)\n\ -longest entry\t%lu\n", - (double) string_hash.collisions / (double) string_hash.searches, - (double) nelts / (double) string_hash.searches, - exp_len, approx_sqrt (exp_len2 - exp2_len), - (unsigned long) longest); -#undef SCALE -#undef LABEL + ht_dump_statistics (ident_hash); } -/* Mark the string hash for GC. */ +/* Mark an identifier for GC. */ -static void -mark_string_hash (arg) - void *arg ATTRIBUTE_UNUSED; +static int +mark_ident (pfile, h, v) + struct cpp_reader *pfile ATTRIBUTE_UNUSED; + hashnode h; + const PTR v ATTRIBUTE_UNUSED; { - sp_hashnode *h; + ggc_mark_nonnull_tree (HT_IDENT_TO_GCC_IDENT (h)); + return 1; +} - FORALL_IDS (h) - { - ggc_mark_tree (SP_TREE (*h)); - } +/* Mark all identifiers for GC. */ + +static void +mark_ident_hash (arg) + PTR arg ATTRIBUTE_UNUSED; +{ + ht_forall (ident_hash, mark_ident, NULL); } -- cgit v1.2.1