summaryrefslogtreecommitdiff
path: root/hashmap.h
diff options
context:
space:
mode:
authorJeff Hostetler <jeffhost@microsoft.com>2017-09-06 15:43:48 +0000
committerJunio C Hamano <gitster@pobox.com>2017-09-07 09:42:02 +0900
commit8b604d19515c4be18403047045faa363d4de217b (patch)
tree5bb6fb5e265c3f46c0a08b498c6ee2159dc326cc /hashmap.h
parent238e487ea943f80734cc6dad665e7238b8cbc7ff (diff)
downloadgit-8b604d19515c4be18403047045faa363d4de217b.tar.gz
hashmap: add API to disable item counting when threadedjh/hashmap-disable-counting
This is to address concerns raised by ThreadSanitizer on the mailing list about threaded unprotected R/W access to map.size with my previous "disallow rehash" change (0607e10009ee4e37cb49b4cec8d28a9dda1656a4). See: https://public-inbox.org/git/adb37b70139fd1e2bac18bfd22c8b96683ae18eb.1502780344.git.martin.agren@gmail.com/ Add API to hashmap to disable item counting and thus automatic rehashing. Also include API to later re-enable them. When item counting is disabled, the map.size field is invalid. So to prevent accidents, the field has been renamed and an accessor function hashmap_get_size() has been added. All direct references to this field have been been updated. And the name of the field changed to map.private_size to communicate this. Here is the relevant output from ThreadSanitizer showing the problem: WARNING: ThreadSanitizer: data race (pid=10554) Read of size 4 at 0x00000082d488 by thread T2 (mutexes: write M16): #0 hashmap_add hashmap.c:209 #1 hash_dir_entry_with_parent_and_prefix name-hash.c:302 #2 handle_range_dir name-hash.c:347 #3 handle_range_1 name-hash.c:415 #4 lazy_dir_thread_proc name-hash.c:471 #5 <null> <null> Previous write of size 4 at 0x00000082d488 by thread T1 (mutexes: write M31): #0 hashmap_add hashmap.c:209 #1 hash_dir_entry_with_parent_and_prefix name-hash.c:302 #2 handle_range_dir name-hash.c:347 #3 handle_range_1 name-hash.c:415 #4 handle_range_dir name-hash.c:380 #5 handle_range_1 name-hash.c:415 #6 lazy_dir_thread_proc name-hash.c:471 #7 <null> <null> Martin gives instructions for running TSan on test t3008 in this post: https://public-inbox.org/git/CAN0heSoJDL9pWELD6ciLTmWf-a=oyxe4EXXOmCKvsG5MSuzxsA@mail.gmail.com/ Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'hashmap.h')
-rw-r--r--hashmap.h72
1 files changed, 51 insertions, 21 deletions
diff --git a/hashmap.h b/hashmap.h
index 7a8fa7fa3d..7cb29a6aed 100644
--- a/hashmap.h
+++ b/hashmap.h
@@ -183,7 +183,7 @@ struct hashmap {
const void *cmpfn_data;
/* total number of entries (0 means the hashmap is empty) */
- unsigned int size;
+ unsigned int private_size; /* use hashmap_get_size() */
/*
* tablesize is the allocated size of the hash table. A non-0 value
@@ -196,8 +196,7 @@ struct hashmap {
unsigned int grow_at;
unsigned int shrink_at;
- /* See `hashmap_disallow_rehash`. */
- unsigned disallow_rehash : 1;
+ unsigned int do_count_items : 1;
};
/* hashmap functions */
@@ -253,6 +252,18 @@ static inline void hashmap_entry_init(void *entry, unsigned int hash)
}
/*
+ * Return the number of items in the map.
+ */
+static inline unsigned int hashmap_get_size(struct hashmap *map)
+{
+ if (map->do_count_items)
+ return map->private_size;
+
+ BUG("hashmap_get_size: size not set");
+ return 0;
+}
+
+/*
* Returns the hashmap entry for the specified key, or NULL if not found.
*
* `map` is the hashmap structure.
@@ -345,24 +356,6 @@ extern void *hashmap_remove(struct hashmap *map, const void *key,
int hashmap_bucket(const struct hashmap *map, unsigned int hash);
/*
- * Disallow/allow rehashing of the hashmap.
- * This is useful if the caller knows that the hashmap needs multi-threaded
- * access. The caller is still required to guard/lock searches and inserts
- * in a manner appropriate to their usage. This simply prevents the table
- * from being unexpectedly re-mapped.
- *
- * It is up to the caller to ensure that the hashmap is initialized to a
- * reasonable size to prevent poor performance.
- *
- * A call to allow rehashing does not force a rehash; that might happen
- * with the next insert or delete.
- */
-static inline void hashmap_disallow_rehash(struct hashmap *map, unsigned value)
-{
- map->disallow_rehash = value;
-}
-
-/*
* Used to iterate over all entries of a hashmap. Note that it is
* not safe to add or remove entries to the hashmap while
* iterating.
@@ -387,6 +380,43 @@ static inline void *hashmap_iter_first(struct hashmap *map,
return hashmap_iter_next(iter);
}
+/*
+ * Disable item counting and automatic rehashing when adding/removing items.
+ *
+ * Normally, the hashmap keeps track of the number of items in the map
+ * and uses it to dynamically resize it. This (both the counting and
+ * the resizing) can cause problems when the map is being used by
+ * threaded callers (because the hashmap code does not know about the
+ * locking strategy used by the threaded callers and therefore, does
+ * not know how to protect the "private_size" counter).
+ */
+static inline void hashmap_disable_item_counting(struct hashmap *map)
+{
+ map->do_count_items = 0;
+}
+
+/*
+ * Re-enable item couting when adding/removing items.
+ * If counting is currently disabled, it will force count them.
+ * It WILL NOT automatically rehash them.
+ */
+static inline void hashmap_enable_item_counting(struct hashmap *map)
+{
+ void *item;
+ unsigned int n = 0;
+ struct hashmap_iter iter;
+
+ if (map->do_count_items)
+ return;
+
+ hashmap_iter_init(map, &iter);
+ while ((item = hashmap_iter_next(&iter)))
+ n++;
+
+ map->do_count_items = 1;
+ map->private_size = n;
+}
+
/* String interning */
/*