diff options
author | Simon Kelley <simon@thekelleys.org.uk> | 2012-01-11 21:31:51 +0000 |
---|---|---|
committer | Simon Kelley <simon@thekelleys.org.uk> | 2012-01-11 21:31:51 +0000 |
commit | 205fafa57767db68a0fb810616d1756a4ce2a1e4 (patch) | |
tree | 051897614fdd54c76ad921cabe2a957a695b2bc8 | |
parent | 8ecfaa4adf10d25d6a69df89fa05fd68a40e9ed2 (diff) | |
download | dnsmasq-2.60test8.tar.gz |
Improve performance when reading large hostfiles.v2.60test8
-rw-r--r-- | src/cache.c | 86 | ||||
-rw-r--r-- | src/config.h | 1 |
2 files changed, 48 insertions, 39 deletions
diff --git a/src/cache.c b/src/cache.c index 9c3664e..0ad0c43 100644 --- a/src/cache.c +++ b/src/cache.c @@ -635,11 +635,12 @@ struct crec *cache_find_by_addr(struct crec *crecp, struct all_addr *addr, } static void add_hosts_entry(struct crec *cache, struct all_addr *addr, int addrlen, - unsigned short flags, int index, int addr_dup) + unsigned short flags, int index, struct crec **rhash) { struct crec *lookup = cache_find_by_name(NULL, cache->name.sname, 0, flags & (F_IPV4 | F_IPV6)); int i, nameexists = 0; struct cname *a; + unsigned int j; /* Remove duplicates in hosts files. */ if (lookup && (lookup->flags & F_HOSTS)) @@ -653,33 +654,41 @@ static void add_hosts_entry(struct crec *cache, struct all_addr *addr, int addrl } /* Ensure there is only one address -> name mapping (first one trumps) - We do this by steam here, first we see if the address is the same as - the last one we saw, which eliminates most in the case of an ad-block - file with thousands of entries for the same address. - Then we search and bail at the first matching address that came from + We do this by steam here, The entries are kept in hash chains, linked + by ->next (which is unused at this point) held in hash buckets in + the array rhash. Note that rhash and the values in ->next are only valid + whilst reading hosts files: the buckets are then freed, and the + ->next pointer used for other things. + + We search and bail at the first matching address that came from a HOSTS file. Since the first host entry gets reverse, we know - then that it must exist without searching exhaustively for it. */ + then that it must exist without searching exhaustively for it. + + This complexity avoids O(n^2) divergent CPU use whilst reading + large (10000 entry) hosts files. */ - if (addr_dup) - flags &= ~F_REVERSE; - else - for (i=0; i<hash_size; i++) + /* hash address */ + for (j = 0, i = 0; i < addrlen; i++) + j += ((unsigned char *)addr)[i] + (j << 6) + (j << 16) - j; + + for (lookup = rhash[j % RHASHSIZE]; lookup; lookup = lookup->next) + if ((lookup->flags & F_HOSTS) && + (lookup->flags & flags & (F_IPV4 | F_IPV6)) && + memcmp(&lookup->addr.addr, addr, addrlen) == 0) { - for (lookup = hash_table[i]; lookup; lookup = lookup->hash_next) - if ((lookup->flags & F_HOSTS) && - (lookup->flags & flags & (F_IPV4 | F_IPV6)) && - memcmp(&lookup->addr.addr, addr, addrlen) == 0) - { - flags &= ~F_REVERSE; - break; - } - if (lookup) - break; + flags &= ~F_REVERSE; + break; } cache->flags = flags; cache->uid = index; + + /* maintain address has chain */ + cache->next = rhash[j % RHASHSIZE]; + rhash[j % RHASHSIZE] = cache; + memcpy(&cache->addr.addr, addr, addrlen); + cache_hash(cache); /* don't need to do alias stuff for second and subsequent addresses. */ @@ -743,14 +752,14 @@ static int gettok(FILE *f, char *token) } } -static int read_hostsfile(char *filename, int index, int cache_size) +static int read_hostsfile(char *filename, int index, int cache_size, struct crec **rhash) { FILE *f = fopen(filename, "r"); char *token = daemon->namebuff, *domain_suffix = NULL; int addr_count = 0, name_count = cache_size, lineno = 0; - unsigned short flags = 0, saved_flags = 0; - struct all_addr addr, saved_addr; - int atnl, addrlen = 0, addr_dup; + unsigned short flags = 0; + struct all_addr addr; + int atnl, addrlen = 0; if (!f) { @@ -762,7 +771,6 @@ static int read_hostsfile(char *filename, int index, int cache_size) while ((atnl = gettok(f, token)) != EOF) { - addr_dup = 0; lineno++; #ifdef HAVE_IPV6 @@ -794,14 +802,6 @@ static int read_hostsfile(char *filename, int index, int cache_size) continue; } - if (saved_flags == flags && memcmp(&addr, &saved_addr, addrlen) == 0) - addr_dup = 1; - else - { - saved_flags = flags; - saved_addr = addr; - } - addr_count++; /* rehash every 1000 names. */ @@ -832,14 +832,13 @@ static int read_hostsfile(char *filename, int index, int cache_size) strcpy(cache->name.sname, canon); strcat(cache->name.sname, "."); strcat(cache->name.sname, domain_suffix); - add_hosts_entry(cache, &addr, addrlen, flags, index, addr_dup); - addr_dup = 1; + add_hosts_entry(cache, &addr, addrlen, flags, index, rhash); name_count++; } if ((cache = whine_malloc(sizeof(struct crec) + strlen(canon)+1-SMALLDNAME))) { strcpy(cache->name.sname, canon); - add_hosts_entry(cache, &addr, addrlen, flags, index, addr_dup); + add_hosts_entry(cache, &addr, addrlen, flags, index, rhash); name_count++; } free(canon); @@ -863,6 +862,7 @@ void cache_reload(void) struct crec *cache, **up, *tmp; int i, total_size = daemon->cachesize; struct hostsfile *ah; + struct crec **reverse_hash; cache_inserted = cache_live_freed = 0; @@ -895,14 +895,22 @@ void cache_reload(void) my_syslog(LOG_INFO, _("cleared cache")); return; } - + + if (!(reverse_hash = whine_malloc(sizeof(struct crec *) * RHASHSIZE))) + return; + + for (i = 0; i < RHASHSIZE; i++) + reverse_hash[i] = NULL; + if (!option_bool(OPT_NO_HOSTS)) - total_size = read_hostsfile(HOSTSFILE, 0, total_size); + total_size = read_hostsfile(HOSTSFILE, 0, total_size, reverse_hash); daemon->addn_hosts = expand_filelist(daemon->addn_hosts); for (ah = daemon->addn_hosts; ah; ah = ah->next) if (!(ah->flags & AH_INACTIVE)) - total_size = read_hostsfile(ah->fname, ah->index, total_size); + total_size = read_hostsfile(ah->fname, ah->index, total_size, reverse_hash); + + free(reverse_hash); } char *get_domain(struct in_addr addr) diff --git a/src/config.h b/src/config.h index 3aecf0b..779776c 100644 --- a/src/config.h +++ b/src/config.h @@ -15,6 +15,7 @@ */ #define FTABSIZ 150 /* max number of outstanding requests (default) */ +#define RHASHSIZE 1024 /* hash buckets for address lookup during hostfile read */ #define MAX_PROCS 20 /* max no children for TCP requests */ #define CHILD_LIFETIME 150 /* secs 'till terminated (RFC1035 suggests > 120s) */ #define EDNS_PKTSZ 4096 /* default max EDNS.0 UDP packet from RFC5625 */ |