diff options
author | German M. Bravo <german.mb@deipi.com> | 2014-09-22 10:02:57 -0500 |
---|---|---|
committer | German M. Bravo <german.mb@deipi.com> | 2014-09-22 10:03:15 -0500 |
commit | bbd3d9b381ff349dc244725abc4748fff587dfc5 (patch) | |
tree | fe7cdd6a1701cc56216b406565210b298edba2eb | |
parent | 46a0deca048a033f62ed4a341a0318bce72f0e0d (diff) | |
download | pyscss-bbd3d9b381ff349dc244725abc4748fff587dfc5.tar.gz |
Added hashtable
-rw-r--r-- | scss/src/_speedups.c | 1 | ||||
-rwxr-xr-x | scss/src/build.py | 2 | ||||
-rw-r--r-- | scss/src/hashtable.c | 211 | ||||
-rw-r--r-- | scss/src/hashtable.h | 27 | ||||
-rw-r--r-- | scss/src/scanner.c | 88 | ||||
-rw-r--r-- | scss/src/scanner.h | 8 | ||||
-rw-r--r-- | setup.py | 2 |
7 files changed, 281 insertions, 58 deletions
diff --git a/scss/src/_speedups.c b/scss/src/_speedups.c index f24e95c..510bbd7 100644 --- a/scss/src/_speedups.c +++ b/scss/src/_speedups.c @@ -567,7 +567,6 @@ MOD_INIT(_scanner) Py_INCREF(PyExc_scss_NoMoreTokens); PyModule_AddObject(m, "NoMoreTokens", (PyObject *)PyExc_scss_NoMoreTokens); #if PY_MAJOR_VERSION >= 3 - return m; #endif } diff --git a/scss/src/build.py b/scss/src/build.py index 2bb771d..dad4ab5 100755 --- a/scss/src/build.py +++ b/scss/src/build.py @@ -23,7 +23,7 @@ if len(sys.argv) == 1: setup(ext_modules=[ Extension( '_scanner', - sources=['_speedups.c', 'block_locator.c', 'scanner.c'], + sources=['_speedups.c', 'block_locator.c', 'scanner.c', 'hashtable.c'], libraries=['pcre'], ), ], cmdclass={'build_ext': build_ext}) diff --git a/scss/src/hashtable.c b/scss/src/hashtable.c new file mode 100644 index 0000000..1a56aea --- /dev/null +++ b/scss/src/hashtable.c @@ -0,0 +1,211 @@ +/* +* [https://gist.github.com/tonious/1377667] +* +* Public Domain Hashtable +* Copyright (c) 2011 Tony Thompson (tonious). +*/ + +#include <stdlib.h> +#include <stdio.h> +#include <limits.h> +#include <string.h> + +#include "hashtable.h" + +static unsigned int +murmurhash3(const void *key, const size_t len, const unsigned int seed) { + /* MurmurHash3, by Austin Appleby, also in the public domain */ + + const unsigned int c1 = 0xcc9e2d51; + const unsigned int c2 = 0x1b873593; + const unsigned int r1 = 15; + const unsigned int r2 = 13; + const unsigned int m = 5; + const unsigned int n = 0xe6546b64; + + unsigned int hash = seed; + + const size_t nblocks = len / 4; + const unsigned int *blocks = (const unsigned int *) key; + size_t i; + + for (i = 0; i < nblocks; i++) { + unsigned int k = blocks[i]; + k *= c1; + k = (k << r1) | (k >> (32 - r1)); + k *= c2; + + hash ^= k; + hash = ((hash << r2) | (hash >> (32 - r2))) * m + n; + } + + const unsigned char *tail = (const unsigned char *) (key + nblocks * 4); + unsigned int k1 = 0; + + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + case 2: + k1 ^= tail[1] << 8; + case 1: + k1 ^= tail[0]; + + k1 *= c1; + k1 = (k1 << r1) | (k1 >> (32 - r1)); + k1 *= c2; + hash ^= k1; + } + + hash ^= len; + hash ^= (hash >> 16); + hash *= 0x85ebca6b; + hash ^= (hash >> 13); + hash *= 0xc2b2ae35; + hash ^= (hash >> 16); + + return hash; +} + +Hashtable * +Hashtable_create(const unsigned int size) { + /* Create a new hashtable */ + + unsigned int i; + Hashtable *hashtable = NULL; + + if (size < 1) return NULL; + + /* Allocate the table itself. */ + if ((hashtable = malloc(sizeof(Hashtable))) == NULL) { + return NULL; + } + + /* Allocate pointers to the head nodes. */ + if ((hashtable->table = malloc(sizeof(Entry *) * size)) == NULL) { + return NULL; + } + for (i = 0; i < size; i++) { + hashtable->table[i] = NULL; + } + + hashtable->size = size; + + return hashtable; +} + +void +Hashtable_del(Hashtable *hashtable) { + int bin; + Entry *next = NULL; + Entry *last = NULL; + + for (bin = 0; bin < hashtable->size; bin++) { + next = hashtable->table[bin]; + + while (next != NULL) { + last = next; + next = next->next; + if (last->key != NULL) { + free(last->key); + } + free(last); + } + } + free(hashtable->table); + free(hashtable); +} + +static unsigned int +_Hashtable_hash(Hashtable *hashtable, const char *key, const size_t len) { + /* Hash a string for a particular hash table */ + + unsigned int hash = murmurhash3(key, len, 0x9747b28c); + + return hash % hashtable->size; +} + +static Entry * +_Hashtable_newpair(const char *key, const size_t len, void *value) { + /* Create a key-value pair */ + + Entry *newpair; + + if ((newpair = malloc(sizeof(Entry))) == NULL) { + return NULL; + } + + if ((newpair->key = memcpy(malloc(len), key, len)) == NULL) { + return NULL; + } + + newpair->value = value; + + newpair->next = NULL; + + return newpair; +} + +void Hashtable_set(Hashtable *hashtable, const char *key, void *value) { + /* Insert a key-value pair into a hash table */ + + int bin = 0; + const size_t len = strlen(key) + 1; + Entry *newpair = NULL; + Entry *next = NULL; + Entry *last = NULL; + + bin = _Hashtable_hash(hashtable, key, len); + + next = hashtable->table[bin]; + + while (next != NULL && next->key != NULL && strcmp(key, next->key) > 0) { + last = next; + next = next->next; + } + + if( next != NULL && next->key != NULL && strcmp( key, next->key ) == 0 ) { + /* There's already a pair. Let's replace that value. */ + next->value = value; + + } else { + /* Nope, could't find it. Time to grow a pair. */ + newpair = _Hashtable_newpair(key, len, value); + + /* We're at the start of the linked list in this bin. */ + if (next == hashtable->table[bin]) { + newpair->next = next; + hashtable->table[bin] = newpair; + + /* We're at the end of the linked list in this bin. */ + } else if (next == NULL) { + last->next = newpair; + + /* We're in the middle of the list. */ + } else { + newpair->next = next; + last->next = newpair; + } + } +} + +void *Hashtable_get(Hashtable *hashtable, const char *key) { + /* Retrieve a key-value pair from a hash table */ + + int bin = 0; + Entry *pair; + + bin = _Hashtable_hash(hashtable, key, strlen(key) + 1); + + /* Step through the bin, looking for our value. */ + pair = hashtable->table[bin]; + while (pair != NULL && pair->key != NULL && strcmp(key, pair->key) > 0) { + pair = pair->next; + } + + /* Did we actually find anything? */ + if (pair == NULL || pair->key == NULL || strcmp(key, pair->key) != 0) { + return 0; + } else { + return pair->value; + } +} diff --git a/scss/src/hashtable.h b/scss/src/hashtable.h new file mode 100644 index 0000000..ff6e326 --- /dev/null +++ b/scss/src/hashtable.h @@ -0,0 +1,27 @@ +/* +* [https://gist.github.com/tonious/1377667] +* +* Public Domain Hashtable +* Copyright (c) 2011 Tony Thompson (tonious). +*/ + +#ifndef HASHTABLE_H +#define HASHTABLE_H + +typedef struct Entry_s { + char *key; + void *value; + struct Entry_s *next; +} Entry; + +typedef struct { + int size; + Entry **table; +} Hashtable; + +Hashtable *Hashtable_create(const unsigned int size); +void Hashtable_del(Hashtable *hashtable); +void Hashtable_set(Hashtable *hashtable, const char *key, void *value); +void *Hashtable_get(Hashtable *hashtable, const char *key); + +#endif diff --git a/scss/src/scanner.c b/scss/src/scanner.c index 1f8ac8a..56e4fbc 100644 --- a/scss/src/scanner.c +++ b/scss/src/scanner.c @@ -150,7 +150,7 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz) Token best_token, *p_token; Restriction *p_restriction; Pattern *regex; - int j, k, max, skip; + int j, k, skip; #ifdef DEBUG fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); @@ -159,6 +159,7 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz) while (1) { regex = NULL; best_token.regex = NULL; + /* Search the patterns for a match, with earlier tokens in the list having preference */ for (j = 0; j < Pattern_patterns_sz; j++) { @@ -166,26 +167,24 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz) #ifdef DEBUG fprintf(stderr, "\tTrying %s: %s at pos %d -> %s\n", repr(regex->tok), repr(regex->expr), self->pos, repr(self->input)); #endif + /* First check to see if we're restricting to this token */ - skip = restrictions_sz; - if (skip) { - max = (restrictions_sz > self->ignore_sz) ? restrictions_sz : self->ignore_sz; - for (k = 0; k < max; k++) { - if (k < restrictions_sz && strcmp(regex->tok, restrictions[k].tok) == 0) { - skip = 0; - break; + if (restrictions_sz) { + if (Hashtable_get(self->ignore, regex->tok) == NULL) { + skip = 1; + for (k = 0; k < restrictions_sz; k++) { + if (strcmp(restrictions[k].tok, regex->tok) == 0) { + skip = 0; + break; + } } - if (k < self->ignore_sz && regex == self->ignore[k]) { - skip = 0; - break; + if (skip) { + continue; + #ifdef DEBUG + fprintf(stderr, "\tSkipping %s!\n", repr(regex->tok)); + #endif } } - if (skip) { - continue; - #ifdef DEBUG - fprintf(stderr, "\tSkipping %s!\n", repr(regex->tok)); - #endif - } } if (Pattern_match( regex, @@ -200,6 +199,7 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz) break; } } + /* If we didn't find anything, raise an error */ if (best_token.regex == NULL) { if (restrictions_sz) { @@ -207,20 +207,16 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz) } return SCANNER_EXC_BAD_TOKEN; } + /* If we found something that isn't to be ignored, return it */ - skip = 0; - for (k = 0; k < self->ignore_sz; k++) { - if (best_token.regex == self->ignore[k]) { - /* This token should be ignored... */ - self->pos += best_token.string_sz; - skip = 1; - break; - } - } - if (!skip) { + if (Hashtable_get(self->ignore, best_token.regex->tok) == NULL) { break; } + + /* This token should be ignored... */ + self->pos += best_token.string_sz; } + if (best_token.regex) { self->pos = (int)(best_token.string - self->input + best_token.string_sz); /* Only add this token if it's not in the list (to prevent looping) */ @@ -239,17 +235,15 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz) memcpy(&self->tokens[self->tokens_sz], &best_token, sizeof(Token)); p_restriction = &self->restrictions[self->tokens_sz]; if (restrictions_sz) { - p_restriction->patterns = PyMem_New(Pattern *, restrictions_sz); - p_restriction->patterns_sz = 0; + p_restriction->patterns = Hashtable_create(1024); for (j = 0; j < restrictions_sz; j++) { regex = Pattern_regex(restrictions[j].tok, restrictions[j].expr); if (regex) { - p_restriction->patterns[p_restriction->patterns_sz++] = regex; + Hashtable_set(p_restriction->patterns, restrictions[j].tok, regex); } } } else { p_restriction->patterns = NULL; - p_restriction->patterns_sz = 0; } self->tokens_sz++; return 1; @@ -270,9 +264,8 @@ Scanner_reset(Scanner *self, char *input, int input_sz) { #endif for (i = 0; i < self->tokens_sz; i++) { - PyMem_Del(self->restrictions[i].patterns); + Hashtable_del(self->restrictions[i].patterns); self->restrictions[i].patterns = NULL; - self->restrictions[i].patterns_sz = 0; } self->tokens_sz = 0; @@ -297,12 +290,12 @@ Scanner_del(Scanner *self) { #endif if (self->ignore != NULL) { - PyMem_Del(self->ignore); + Hashtable_del(self->ignore); } if (self->tokens != NULL) { for (i = 0; i < self->tokens_sz; i++) { - PyMem_Del(self->restrictions[i].patterns); + Hashtable_del(self->restrictions[i].patterns); } PyMem_Del(self->tokens); PyMem_Del(self->restrictions); @@ -329,6 +322,7 @@ Scanner_new(Pattern patterns[], int patterns_sz, Pattern ignore[], int ignore_sz self = PyMem_New(Scanner, 1); memset(self, 0, sizeof(Scanner)); if (self) { + /* Initialize patterns */ for (i = 0; i < patterns_sz; i++) { regex = Pattern_regex(patterns[i].tok, patterns[i].expr); #ifdef DEBUG @@ -337,12 +331,13 @@ Scanner_new(Pattern patterns[], int patterns_sz, Pattern ignore[], int ignore_sz } #endif } + /* Initialize ignored */ if (ignore_sz) { - self->ignore = PyMem_New(Pattern *, ignore_sz); + self->ignore = Hashtable_create(1024); for (i = 0; i < ignore_sz; i++) { regex = Pattern_regex(ignore[i].tok, ignore[i].expr); if (regex) { - self->ignore[self->ignore_sz++] = regex; + Hashtable_set(self->ignore, ignore[i].tok, regex); #ifdef DEBUG fprintf(stderr, "\tIgnoring token %s\n", repr(regex->tok)); #endif @@ -385,7 +380,7 @@ Scanner_finalize(void) Token* Scanner_token(Scanner *self, int i, Pattern restrictions[], int restrictions_sz) { - int j, k, found; + int j; long result; #ifdef DEBUG @@ -398,19 +393,10 @@ Scanner_token(Scanner *self, int i, Pattern restrictions[], int restrictions_sz) return (Token *)result; } } else if (i >= 0 && i < self->tokens_sz) { - if (self->restrictions[i].patterns_sz) { - for (j = 0; j < restrictions_sz; j++) { - found = 0; - for (k = 0; k < self->restrictions[i].patterns_sz; k++) { - if (strcmp(restrictions[j].tok, self->restrictions[i].patterns[k]->tok) == 0) { - found = 1; - break; - } - } - if (!found) { - sprintf(self->exc, "Unimplemented: restriction set changed"); - return (Token *)SCANNER_EXC_UNIMPLEMENTED; - } + for (j = 0; j < restrictions_sz; j++) { + if (Hashtable_get(self->restrictions[i].patterns, restrictions[j].tok) == NULL) { + sprintf(self->exc, "Unimplemented: restriction set changed"); + return (Token *)SCANNER_EXC_UNIMPLEMENTED; } } } diff --git a/scss/src/scanner.h b/scss/src/scanner.h index df9fb62..d4b7e15 100644 --- a/scss/src/scanner.h +++ b/scss/src/scanner.h @@ -11,6 +11,8 @@ #ifndef SCANNER_H #define SCANNER_H +#include "hashtable.h" + #define PCRE_STATIC #include <pcre.h> @@ -37,14 +39,12 @@ typedef struct { } Token; typedef struct { - int patterns_sz; - Pattern **patterns; + Hashtable *patterns; } Restriction; typedef struct { char exc[MAX_EXC_STRING]; - int ignore_sz; - Pattern **ignore; + Hashtable *ignore; int tokens_sz; int tokens_bsz; Token *tokens; @@ -30,7 +30,7 @@ speedups = Feature( # include headers in an sdist (since they're typically in /usr/lib) Extension( 'scss.grammar._scanner', - sources=['scss/src/_speedups.c', 'scss/src/block_locator.c', 'scss/src/scanner.c'], + sources=['scss/src/_speedups.c', 'scss/src/block_locator.c', 'scss/src/scanner.c', 'scss/src/hashtable.c'], libraries=['pcre'] ), ], |