summaryrefslogtreecommitdiff
path: root/scss
diff options
context:
space:
mode:
authorGerman M. Bravo <german.mb@deipi.com>2014-09-22 10:02:57 -0500
committerGerman M. Bravo <german.mb@deipi.com>2014-09-22 10:03:15 -0500
commitbbd3d9b381ff349dc244725abc4748fff587dfc5 (patch)
treefe7cdd6a1701cc56216b406565210b298edba2eb /scss
parent46a0deca048a033f62ed4a341a0318bce72f0e0d (diff)
downloadpyscss-bbd3d9b381ff349dc244725abc4748fff587dfc5.tar.gz
Added hashtable
Diffstat (limited to 'scss')
-rw-r--r--scss/src/_speedups.c1
-rwxr-xr-xscss/src/build.py2
-rw-r--r--scss/src/hashtable.c211
-rw-r--r--scss/src/hashtable.h27
-rw-r--r--scss/src/scanner.c88
-rw-r--r--scss/src/scanner.h8
6 files changed, 280 insertions, 57 deletions
diff --git a/scss/src/_speedups.c b/scss/src/_speedups.c
index f24e95c..510bbd7 100644
--- a/scss/src/_speedups.c
+++ b/scss/src/_speedups.c
@@ -567,7 +567,6 @@ MOD_INIT(_scanner)
Py_INCREF(PyExc_scss_NoMoreTokens);
PyModule_AddObject(m, "NoMoreTokens", (PyObject *)PyExc_scss_NoMoreTokens);
#if PY_MAJOR_VERSION >= 3
-
return m;
#endif
}
diff --git a/scss/src/build.py b/scss/src/build.py
index 2bb771d..dad4ab5 100755
--- a/scss/src/build.py
+++ b/scss/src/build.py
@@ -23,7 +23,7 @@ if len(sys.argv) == 1:
setup(ext_modules=[
Extension(
'_scanner',
- sources=['_speedups.c', 'block_locator.c', 'scanner.c'],
+ sources=['_speedups.c', 'block_locator.c', 'scanner.c', 'hashtable.c'],
libraries=['pcre'],
),
], cmdclass={'build_ext': build_ext})
diff --git a/scss/src/hashtable.c b/scss/src/hashtable.c
new file mode 100644
index 0000000..1a56aea
--- /dev/null
+++ b/scss/src/hashtable.c
@@ -0,0 +1,211 @@
+/*
+* [https://gist.github.com/tonious/1377667]
+*
+* Public Domain Hashtable
+* Copyright (c) 2011 Tony Thompson (tonious).
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <limits.h>
+#include <string.h>
+
+#include "hashtable.h"
+
+static unsigned int
+murmurhash3(const void *key, const size_t len, const unsigned int seed) {
+ /* MurmurHash3, by Austin Appleby, also in the public domain */
+
+ const unsigned int c1 = 0xcc9e2d51;
+ const unsigned int c2 = 0x1b873593;
+ const unsigned int r1 = 15;
+ const unsigned int r2 = 13;
+ const unsigned int m = 5;
+ const unsigned int n = 0xe6546b64;
+
+ unsigned int hash = seed;
+
+ const size_t nblocks = len / 4;
+ const unsigned int *blocks = (const unsigned int *) key;
+ size_t i;
+
+ for (i = 0; i < nblocks; i++) {
+ unsigned int k = blocks[i];
+ k *= c1;
+ k = (k << r1) | (k >> (32 - r1));
+ k *= c2;
+
+ hash ^= k;
+ hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
+ }
+
+ const unsigned char *tail = (const unsigned char *) (key + nblocks * 4);
+ unsigned int k1 = 0;
+
+ switch (len & 3) {
+ case 3:
+ k1 ^= tail[2] << 16;
+ case 2:
+ k1 ^= tail[1] << 8;
+ case 1:
+ k1 ^= tail[0];
+
+ k1 *= c1;
+ k1 = (k1 << r1) | (k1 >> (32 - r1));
+ k1 *= c2;
+ hash ^= k1;
+ }
+
+ hash ^= len;
+ hash ^= (hash >> 16);
+ hash *= 0x85ebca6b;
+ hash ^= (hash >> 13);
+ hash *= 0xc2b2ae35;
+ hash ^= (hash >> 16);
+
+ return hash;
+}
+
+Hashtable *
+Hashtable_create(const unsigned int size) {
+ /* Create a new hashtable */
+
+ unsigned int i;
+ Hashtable *hashtable = NULL;
+
+ if (size < 1) return NULL;
+
+ /* Allocate the table itself. */
+ if ((hashtable = malloc(sizeof(Hashtable))) == NULL) {
+ return NULL;
+ }
+
+ /* Allocate pointers to the head nodes. */
+ if ((hashtable->table = malloc(sizeof(Entry *) * size)) == NULL) {
+ return NULL;
+ }
+ for (i = 0; i < size; i++) {
+ hashtable->table[i] = NULL;
+ }
+
+ hashtable->size = size;
+
+ return hashtable;
+}
+
+void
+Hashtable_del(Hashtable *hashtable) {
+ int bin;
+ Entry *next = NULL;
+ Entry *last = NULL;
+
+ for (bin = 0; bin < hashtable->size; bin++) {
+ next = hashtable->table[bin];
+
+ while (next != NULL) {
+ last = next;
+ next = next->next;
+ if (last->key != NULL) {
+ free(last->key);
+ }
+ free(last);
+ }
+ }
+ free(hashtable->table);
+ free(hashtable);
+}
+
+static unsigned int
+_Hashtable_hash(Hashtable *hashtable, const char *key, const size_t len) {
+ /* Hash a string for a particular hash table */
+
+ unsigned int hash = murmurhash3(key, len, 0x9747b28c);
+
+ return hash % hashtable->size;
+}
+
+static Entry *
+_Hashtable_newpair(const char *key, const size_t len, void *value) {
+ /* Create a key-value pair */
+
+ Entry *newpair;
+
+ if ((newpair = malloc(sizeof(Entry))) == NULL) {
+ return NULL;
+ }
+
+ if ((newpair->key = memcpy(malloc(len), key, len)) == NULL) {
+ return NULL;
+ }
+
+ newpair->value = value;
+
+ newpair->next = NULL;
+
+ return newpair;
+}
+
+void Hashtable_set(Hashtable *hashtable, const char *key, void *value) {
+ /* Insert a key-value pair into a hash table */
+
+ int bin = 0;
+ const size_t len = strlen(key) + 1;
+ Entry *newpair = NULL;
+ Entry *next = NULL;
+ Entry *last = NULL;
+
+ bin = _Hashtable_hash(hashtable, key, len);
+
+ next = hashtable->table[bin];
+
+ while (next != NULL && next->key != NULL && strcmp(key, next->key) > 0) {
+ last = next;
+ next = next->next;
+ }
+
+ if( next != NULL && next->key != NULL && strcmp( key, next->key ) == 0 ) {
+ /* There's already a pair. Let's replace that value. */
+ next->value = value;
+
+ } else {
+ /* Nope, could't find it. Time to grow a pair. */
+ newpair = _Hashtable_newpair(key, len, value);
+
+ /* We're at the start of the linked list in this bin. */
+ if (next == hashtable->table[bin]) {
+ newpair->next = next;
+ hashtable->table[bin] = newpair;
+
+ /* We're at the end of the linked list in this bin. */
+ } else if (next == NULL) {
+ last->next = newpair;
+
+ /* We're in the middle of the list. */
+ } else {
+ newpair->next = next;
+ last->next = newpair;
+ }
+ }
+}
+
+void *Hashtable_get(Hashtable *hashtable, const char *key) {
+ /* Retrieve a key-value pair from a hash table */
+
+ int bin = 0;
+ Entry *pair;
+
+ bin = _Hashtable_hash(hashtable, key, strlen(key) + 1);
+
+ /* Step through the bin, looking for our value. */
+ pair = hashtable->table[bin];
+ while (pair != NULL && pair->key != NULL && strcmp(key, pair->key) > 0) {
+ pair = pair->next;
+ }
+
+ /* Did we actually find anything? */
+ if (pair == NULL || pair->key == NULL || strcmp(key, pair->key) != 0) {
+ return 0;
+ } else {
+ return pair->value;
+ }
+}
diff --git a/scss/src/hashtable.h b/scss/src/hashtable.h
new file mode 100644
index 0000000..ff6e326
--- /dev/null
+++ b/scss/src/hashtable.h
@@ -0,0 +1,27 @@
+/*
+* [https://gist.github.com/tonious/1377667]
+*
+* Public Domain Hashtable
+* Copyright (c) 2011 Tony Thompson (tonious).
+*/
+
+#ifndef HASHTABLE_H
+#define HASHTABLE_H
+
+typedef struct Entry_s {
+ char *key;
+ void *value;
+ struct Entry_s *next;
+} Entry;
+
+typedef struct {
+ int size;
+ Entry **table;
+} Hashtable;
+
+Hashtable *Hashtable_create(const unsigned int size);
+void Hashtable_del(Hashtable *hashtable);
+void Hashtable_set(Hashtable *hashtable, const char *key, void *value);
+void *Hashtable_get(Hashtable *hashtable, const char *key);
+
+#endif
diff --git a/scss/src/scanner.c b/scss/src/scanner.c
index 1f8ac8a..56e4fbc 100644
--- a/scss/src/scanner.c
+++ b/scss/src/scanner.c
@@ -150,7 +150,7 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
Token best_token, *p_token;
Restriction *p_restriction;
Pattern *regex;
- int j, k, max, skip;
+ int j, k, skip;
#ifdef DEBUG
fprintf(stderr, "%s\n", __PRETTY_FUNCTION__);
@@ -159,6 +159,7 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
while (1) {
regex = NULL;
best_token.regex = NULL;
+
/* Search the patterns for a match, with earlier
tokens in the list having preference */
for (j = 0; j < Pattern_patterns_sz; j++) {
@@ -166,26 +167,24 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
#ifdef DEBUG
fprintf(stderr, "\tTrying %s: %s at pos %d -> %s\n", repr(regex->tok), repr(regex->expr), self->pos, repr(self->input));
#endif
+
/* First check to see if we're restricting to this token */
- skip = restrictions_sz;
- if (skip) {
- max = (restrictions_sz > self->ignore_sz) ? restrictions_sz : self->ignore_sz;
- for (k = 0; k < max; k++) {
- if (k < restrictions_sz && strcmp(regex->tok, restrictions[k].tok) == 0) {
- skip = 0;
- break;
+ if (restrictions_sz) {
+ if (Hashtable_get(self->ignore, regex->tok) == NULL) {
+ skip = 1;
+ for (k = 0; k < restrictions_sz; k++) {
+ if (strcmp(restrictions[k].tok, regex->tok) == 0) {
+ skip = 0;
+ break;
+ }
}
- if (k < self->ignore_sz && regex == self->ignore[k]) {
- skip = 0;
- break;
+ if (skip) {
+ continue;
+ #ifdef DEBUG
+ fprintf(stderr, "\tSkipping %s!\n", repr(regex->tok));
+ #endif
}
}
- if (skip) {
- continue;
- #ifdef DEBUG
- fprintf(stderr, "\tSkipping %s!\n", repr(regex->tok));
- #endif
- }
}
if (Pattern_match(
regex,
@@ -200,6 +199,7 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
break;
}
}
+
/* If we didn't find anything, raise an error */
if (best_token.regex == NULL) {
if (restrictions_sz) {
@@ -207,20 +207,16 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
}
return SCANNER_EXC_BAD_TOKEN;
}
+
/* If we found something that isn't to be ignored, return it */
- skip = 0;
- for (k = 0; k < self->ignore_sz; k++) {
- if (best_token.regex == self->ignore[k]) {
- /* This token should be ignored... */
- self->pos += best_token.string_sz;
- skip = 1;
- break;
- }
- }
- if (!skip) {
+ if (Hashtable_get(self->ignore, best_token.regex->tok) == NULL) {
break;
}
+
+ /* This token should be ignored... */
+ self->pos += best_token.string_sz;
}
+
if (best_token.regex) {
self->pos = (int)(best_token.string - self->input + best_token.string_sz);
/* Only add this token if it's not in the list (to prevent looping) */
@@ -239,17 +235,15 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
memcpy(&self->tokens[self->tokens_sz], &best_token, sizeof(Token));
p_restriction = &self->restrictions[self->tokens_sz];
if (restrictions_sz) {
- p_restriction->patterns = PyMem_New(Pattern *, restrictions_sz);
- p_restriction->patterns_sz = 0;
+ p_restriction->patterns = Hashtable_create(1024);
for (j = 0; j < restrictions_sz; j++) {
regex = Pattern_regex(restrictions[j].tok, restrictions[j].expr);
if (regex) {
- p_restriction->patterns[p_restriction->patterns_sz++] = regex;
+ Hashtable_set(p_restriction->patterns, restrictions[j].tok, regex);
}
}
} else {
p_restriction->patterns = NULL;
- p_restriction->patterns_sz = 0;
}
self->tokens_sz++;
return 1;
@@ -270,9 +264,8 @@ Scanner_reset(Scanner *self, char *input, int input_sz) {
#endif
for (i = 0; i < self->tokens_sz; i++) {
- PyMem_Del(self->restrictions[i].patterns);
+ Hashtable_del(self->restrictions[i].patterns);
self->restrictions[i].patterns = NULL;
- self->restrictions[i].patterns_sz = 0;
}
self->tokens_sz = 0;
@@ -297,12 +290,12 @@ Scanner_del(Scanner *self) {
#endif
if (self->ignore != NULL) {
- PyMem_Del(self->ignore);
+ Hashtable_del(self->ignore);
}
if (self->tokens != NULL) {
for (i = 0; i < self->tokens_sz; i++) {
- PyMem_Del(self->restrictions[i].patterns);
+ Hashtable_del(self->restrictions[i].patterns);
}
PyMem_Del(self->tokens);
PyMem_Del(self->restrictions);
@@ -329,6 +322,7 @@ Scanner_new(Pattern patterns[], int patterns_sz, Pattern ignore[], int ignore_sz
self = PyMem_New(Scanner, 1);
memset(self, 0, sizeof(Scanner));
if (self) {
+ /* Initialize patterns */
for (i = 0; i < patterns_sz; i++) {
regex = Pattern_regex(patterns[i].tok, patterns[i].expr);
#ifdef DEBUG
@@ -337,12 +331,13 @@ Scanner_new(Pattern patterns[], int patterns_sz, Pattern ignore[], int ignore_sz
}
#endif
}
+ /* Initialize ignored */
if (ignore_sz) {
- self->ignore = PyMem_New(Pattern *, ignore_sz);
+ self->ignore = Hashtable_create(1024);
for (i = 0; i < ignore_sz; i++) {
regex = Pattern_regex(ignore[i].tok, ignore[i].expr);
if (regex) {
- self->ignore[self->ignore_sz++] = regex;
+ Hashtable_set(self->ignore, ignore[i].tok, regex);
#ifdef DEBUG
fprintf(stderr, "\tIgnoring token %s\n", repr(regex->tok));
#endif
@@ -385,7 +380,7 @@ Scanner_finalize(void)
Token*
Scanner_token(Scanner *self, int i, Pattern restrictions[], int restrictions_sz)
{
- int j, k, found;
+ int j;
long result;
#ifdef DEBUG
@@ -398,19 +393,10 @@ Scanner_token(Scanner *self, int i, Pattern restrictions[], int restrictions_sz)
return (Token *)result;
}
} else if (i >= 0 && i < self->tokens_sz) {
- if (self->restrictions[i].patterns_sz) {
- for (j = 0; j < restrictions_sz; j++) {
- found = 0;
- for (k = 0; k < self->restrictions[i].patterns_sz; k++) {
- if (strcmp(restrictions[j].tok, self->restrictions[i].patterns[k]->tok) == 0) {
- found = 1;
- break;
- }
- }
- if (!found) {
- sprintf(self->exc, "Unimplemented: restriction set changed");
- return (Token *)SCANNER_EXC_UNIMPLEMENTED;
- }
+ for (j = 0; j < restrictions_sz; j++) {
+ if (Hashtable_get(self->restrictions[i].patterns, restrictions[j].tok) == NULL) {
+ sprintf(self->exc, "Unimplemented: restriction set changed");
+ return (Token *)SCANNER_EXC_UNIMPLEMENTED;
}
}
}
diff --git a/scss/src/scanner.h b/scss/src/scanner.h
index df9fb62..d4b7e15 100644
--- a/scss/src/scanner.h
+++ b/scss/src/scanner.h
@@ -11,6 +11,8 @@
#ifndef SCANNER_H
#define SCANNER_H
+#include "hashtable.h"
+
#define PCRE_STATIC
#include <pcre.h>
@@ -37,14 +39,12 @@ typedef struct {
} Token;
typedef struct {
- int patterns_sz;
- Pattern **patterns;
+ Hashtable *patterns;
} Restriction;
typedef struct {
char exc[MAX_EXC_STRING];
- int ignore_sz;
- Pattern **ignore;
+ Hashtable *ignore;
int tokens_sz;
int tokens_bsz;
Token *tokens;