Scanner updatedgrammar-test

author: German M. Bravo <german.mb@deipi.com> 2013-08-15 17:07:27 -0500
committer: German M. Bravo <german.mb@deipi.com> 2013-08-16 10:16:22 -0500
commit: 7f8f00988e6c58d9d49d6e4c973ee177345b46b0 (patch)
tree: 2351271978f311f9b0c07ff6ce9a20d6774961d6
parent: 0416b034d8e80e134b307515ce2f755abcc691eb (diff)
download: pyscss-grammar-test.tar.gz
4 files changed, 87 insertions, 227 deletions
diff --git a/scss/src/_speedups.c b/scss/src/_speedups.c
index b4b990c..59c56e5 100644
--- a/scss/src/_speedups.c
+++ b/scss/src/_speedups.c
@@ -6,7 +6,7 @@
 * https://github.com/Kronuz/pyScss
 *
 * MIT license (http://www.opensource.org/licenses/mit-license.php)
-* Copyright (c) 2011 German M. Bravo (Kronuz), All rights reserved.
+* Copyright (c) 2011, 2013 German M. Bravo (Kronuz), All rights reserved.
 */
 #include <Python.h>
 #include "block_locator.h"
diff --git a/scss/src/scanner.c b/scss/src/scanner.c
index ca74ba7..a196aa5 100644
--- a/scss/src/scanner.c
+++ b/scss/src/scanner.c
@@ -6,7 +6,7 @@
 * https://github.com/Kronuz/pyScss
 *
 * MIT license (http://www.opensource.org/licenses/mit-license.php)
-* Copyright (c) 2011 German M. Bravo (Kronuz), All rights reserved.
+* Copyright (c) 2011, 2013 German M. Bravo (Kronuz), All rights reserved.
 */
 #include <Python.h>
 
@@ -159,10 +159,10 @@ Pattern_finalize(void) {
 static long
 _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
 {
-	Token best_token, *p_token;
+	Token best_pat, *last_read_token;
 	Restriction *p_restriction;
 	Pattern *regex;
-	int j, k, max, skip;
+	int j, k, max, ignore;
 	size_t len;
 	char *aux;
 
@@ -172,7 +172,7 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
 
 	while (1) {
 		regex = NULL;
-		best_token.regex = NULL;
+		best_pat.regex = NULL;
 		/* Search the patterns for a match, with earlier
 		   tokens in the list having preference */
 		for (j = 0; j < Pattern_patterns_sz; j++) {
@@ -181,24 +181,24 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
 				fprintf(stderr, "\tTrying %s: %s at pos %d -> %s\n", repr(regex->tok), repr(regex->expr), self->pos, repr(self->input));
 			#endif
 			/* First check to see if we're restricting to this token */
-			skip = restrictions_sz;
-			if (skip) {
+			ignore = restrictions_sz;
+			if (ignore) {
 				max = (restrictions_sz > self->ignore_sz) ? restrictions_sz : self->ignore_sz;
 				for (k = 0; k < max; k++) {
 					if (k < restrictions_sz && strcmp(regex->tok, restrictions[k].tok) == 0) {
-						skip = 0;
+						ignore = 0;
 						break;
 					}
 					if (k < self->ignore_sz && regex == self->ignore[k]) {
-						skip = 0;
+						ignore = 0;
 						break;
 					}
 				}
-				if (skip) {
-					continue;
+				if (ignore) {
 					#ifdef DEBUG
 						fprintf(stderr, "\tSkipping %s!\n", repr(regex->tok));
 					#endif
+					continue;
 				}
 			}
 			if (Pattern_match(
@@ -206,7 +206,7 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
 				self->input,
 				self->input_sz,
 				self->pos,
-				&best_token
+				&best_pat
 			)) {
 				#ifdef DEBUG
 					fprintf(stderr, "Match OK! %s: %s at pos %d\n", repr(regex->tok), repr(regex->expr), self->pos);
@@ -215,7 +215,7 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
 			}
 		}
 		/* If we didn't find anything, raise an error */
-		if (best_token.regex == NULL) {
+		if (best_pat.regex == NULL) {
 			if (restrictions_sz) {
 				sprintf(self->exc, "SyntaxError[@ char %d: %s found while trying to find one of the restricted tokens: ", self->pos, (regex == NULL) ? "???" : repr(regex->tok));
 				aux = self->exc + strlen(self->exc);
@@ -234,55 +234,52 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
 			sprintf(self->exc, "SyntaxError[@ char %d: Bad token: %s]", self->pos, (regex == NULL) ? "???" : repr(regex->tok));
 			return SCANNER_EXC_BAD_TOKEN;
 		}
-		/* If we found something that isn't to be ignored, return it */
-		skip = 0;
+
+		ignore = 0;  /* Should this token be ignored? */
 		for (k = 0; k < self->ignore_sz; k++) {
-			if (best_token.regex == self->ignore[k]) {
-				/* This token should be ignored... */
-				self->pos += best_token.string_sz;
-				skip = 1;
+			if (best_pat.regex == self->ignore[k]) {
+				ignore = 1;
 				break;
 			}
 		}
-		if (!skip) {
-			break;
-		}
-	}
-	if (best_token.regex) {
-		self->pos = (int)(best_token.string - self->input + best_token.string_sz);
-		/* Only add this token if it's not in the list (to prevent looping) */
-		p_token = &self->tokens[self->tokens_sz - 1];
-		if (self->tokens_sz == 0 ||
-			p_token->regex != best_token.regex ||
-			p_token->string != best_token.string ||
-			p_token->string_sz != best_token.string_sz
-		) {
-			if (self->tokens_sz >= self->tokens_bsz) {
-				/* Needs to expand block */
-				self->tokens_bsz = self->tokens_bsz + BLOCK_SIZE_PATTERNS;
-				PyMem_Resize(self->tokens, Token, self->tokens_bsz);
-				PyMem_Resize(self->restrictions, Restriction, self->tokens_bsz);
-			}
-			memcpy(&self->tokens[self->tokens_sz], &best_token, sizeof(Token));
-			p_restriction = &self->restrictions[self->tokens_sz];
-			if (restrictions_sz) {
-				p_restriction->patterns = PyMem_New(Pattern *, restrictions_sz);
-				p_restriction->patterns_sz = 0;
-				for (j = 0; j < restrictions_sz; j++) {
-					regex = Pattern_regex(restrictions[j].tok, restrictions[j].expr);
-					if (regex) {
-						p_restriction->patterns[p_restriction->patterns_sz++] = regex;
+		self->pos += best_pat.string_sz;
+
+		/* If we found something that isn't to be ignored, return it */
+		if (!ignore) {			
+			/* Only add this token if it's not in the list (to prevent looping) */
+			last_read_token = &self->tokens[self->tokens_sz - 1];
+			if (self->tokens_sz == 0 ||
+				last_read_token->regex != best_pat.regex ||
+				last_read_token->string != best_pat.string ||
+				last_read_token->string_sz != best_pat.string_sz
+			) {
+				if (self->tokens_sz >= self->tokens_bsz) {
+					/* Needs to expand blocks */
+					self->tokens_bsz = self->tokens_bsz + BLOCK_SIZE_PATTERNS;
+					PyMem_Resize(self->tokens, Token, self->tokens_bsz);
+					PyMem_Resize(self->restrictions, Restriction, self->tokens_bsz);
+				}
+				memcpy(&self->tokens[self->tokens_sz], &best_pat, sizeof(Token));
+				p_restriction = &self->restrictions[self->tokens_sz];
+				if (restrictions_sz) {
+					p_restriction->patterns = PyMem_New(Pattern *, restrictions_sz);
+					p_restriction->patterns_sz = 0;
+					for (j = 0; j < restrictions_sz; j++) {
+						regex = Pattern_regex(restrictions[j].tok, restrictions[j].expr);
+						if (regex) {
+							p_restriction->patterns[p_restriction->patterns_sz++] = regex;
+						}
 					}
+				} else {
+					p_restriction->patterns = NULL;
+					p_restriction->patterns_sz = 0;
 				}
-			} else {
-				p_restriction->patterns = NULL;
-				p_restriction->patterns_sz = 0;
+				self->tokens_sz++;
+				return 1;
 			}
-			self->tokens_sz++;
-			return 1;
+			return 0;
 		}
 	}
-	return 0;
 }
 
 
diff --git a/scss/src/scanner.py b/scss/src/scanner.py
index 97c0b34..d148b30 100755
--- a/scss/src/scanner.py
+++ b/scss/src/scanner.py
@@ -84,132 +84,7 @@ PATTERNS = [
 # Parser
 DEBUG = False
 
-
-class NoMoreTokens(Exception):
-    """
-    Another exception object, for when we run out of tokens
-    """
-    pass
-
-
-class Scanner(object):
-    def __init__(self, patterns, ignore, input=None):
-        """
-        Patterns is [(terminal,regex)...]
-        Ignore is [terminal,...];
-        Input is a string
-        """
-        self.reset(input)
-        self.ignore = ignore
-        # The stored patterns are a pair (compiled regex,source
-        # regex).  If the patterns variable passed in to the
-        # constructor is None, we assume that the class already has a
-        # proper .patterns list constructed
-        if patterns is not None:
-            self.patterns = []
-            for k, r in patterns:
-                self.patterns.append((k, re.compile(r)))
-
-    def reset(self, input):
-        self.tokens = []
-        self.restrictions = []
-        self.input = input
-        self.pos = 0
-
-    def __repr__(self):
-        """
-        Print the last 10 tokens that have been scanned in
-        """
-        output = ''
-        for t in self.tokens[-10:]:
-            output = "%s\n  (@%s)  %s  =  %s" % (output, t[0], t[2], repr(t[3]))
-        return output
-
-    def _scan(self, restrict):
-        """
-        Should scan another token and add it to the list, self.tokens,
-        and add the restriction to self.restrictions
-        """
-        # Keep looking for a token, ignoring any in self.ignore
-        token = None
-        while True:
-            tok = None
-            best_pat = None
-            # Search the patterns for a match, with earlier
-            # tokens in the list having preference
-            best_pat_len = 0
-            for tok, regex in self.patterns:
-                if DEBUG:
-                    print("\tTrying %s: %s at pos %d -> %s" % (repr(tok), repr(regex.pattern), self.pos, repr(self.input)))
-                # First check to see if we're restricting to this token
-                if restrict and tok not in restrict and tok not in self.ignore:
-                    if DEBUG:
-                        print "\tSkipping %s!" % repr(tok)
-                    continue
-                m = regex.match(self.input, self.pos)
-                if m:
-                    # We got a match
-                    best_pat = tok
-                    best_pat_len = len(m.group(0))
-                    if DEBUG:
-                        print("Match OK! %s: %s at pos %d" % (repr(tok), repr(regex.pattern), self.pos))
-                    break
-
-            # If we didn't find anything, raise an error
-            if best_pat is None:
-                msg = "Bad token: %s" % ("???" if tok is None else repr(tok),)
-                if restrict:
-                    msg = "%s found while trying to find one of the restricted tokens: %s" % ("???" if tok is None else repr(tok), ", ".join(repr(r) for r in restrict))
-                raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(self.pos), msg))
-
-            # If we found something that isn't to be ignored, return it
-            if best_pat in self.ignore:
-                # This token should be ignored...
-                self.pos += best_pat_len
-            else:
-                end_pos = self.pos + best_pat_len
-                # Create a token with this data
-                token = (
-                    self.pos,
-                    end_pos,
-                    best_pat,
-                    self.input[self.pos:end_pos]
-                )
-                break
-        if token is not None:
-            self.pos = token[1]
-            # Only add this token if it's not in the list
-            # (to prevent looping)
-            if not self.tokens or token != self.tokens[-1]:
-                self.tokens.append(token)
-                self.restrictions.append(restrict)
-                return 1
-        return 0
-
-    def token(self, i, restrict=None):
-        """
-        Get the i'th token, and if i is one past the end, then scan
-        for another token; restrict is a list of tokens that
-        are allowed, or 0 for any token.
-        """
-        tokens_len = len(self.tokens)
-        if i == tokens_len:  # We are at the end, get the next...
-            tokens_len += self._scan(restrict)
-        elif i >= 0 and i < tokens_len:
-            if restrict and self.restrictions[i] and restrict > self.restrictions[i]:
-                raise NotImplementedError("Unimplemented: restriction set changed")
-        if i >= 0 and i < tokens_len:
-            return self.tokens[i]
-        raise NoMoreTokens()
-
-    def rewind(self, i):
-        tokens_len = len(self.tokens)
-        if i <= tokens_len:
-            token = self.tokens[i]
-            self.tokens = self.tokens[:i]
-            self.restrictions = self.restrictions[:i]
-            self.pos = token[0]
-
+from yapps.runtime import Scanner
 
 class _Scanner_a(Scanner):
     patterns = None
diff --git a/scss/src/yapps/runtime.py b/scss/src/yapps/runtime.py
index e23210a..6920fdb 100644
--- a/scss/src/yapps/runtime.py
+++ b/scss/src/yapps/runtime.py
@@ -21,7 +21,6 @@ import re
 import sys
 
 DEBUG = False
-MIN_WINDOW = 4096
 # File lookup window
 
 
@@ -71,9 +70,6 @@ class Token(object):
         return output
 
 
-in_name = 0
-
-
 class Scanner(object):
     """Yapps scanner.
 
@@ -85,9 +81,11 @@ class Scanner(object):
     restriction (the set is always the full set of tokens).
 
     """
+    MIN_WINDOW = 4096
+    in_name = 0
 
     def __init__(self, patterns, ignore, input="",
-            file=None, filename=None, stacked=False):
+            file=None, filename=None):
         """Initialize the scanner.
 
         Parameters:
@@ -106,13 +104,11 @@ class Scanner(object):
         """
 
         if not filename:
-            global in_name
-            filename = "<f.%d>" % in_name
-            in_name += 1
+            filename = "<f.%d>" % self.__class__.in_name
+            self.__class__.in_name += 1
 
         self.reset(input, file, filename)
         self.ignore = ignore
-        self.stacked = stacked
 
         if patterns is not None:
             # Compile the regex strings into regex objects
@@ -216,19 +212,19 @@ class Scanner(object):
         """Get more input if possible."""
         if not self.file:
             return
-        if len(self.input) - self.pos >= MIN_WINDOW:
+        if len(self.input) - self.pos >= self.MIN_WINDOW:
             return
 
-        data = self.file.read(MIN_WINDOW)
+        data = self.file.read(self.MIN_WINDOW)
         if data is None or data == "":
             self.file = None
 
         # Drop bytes from the start, if necessary.
-        if self.pos > 2 * MIN_WINDOW:
-            self.del_pos += MIN_WINDOW
-            self.del_line += self.input[:MIN_WINDOW].count("\n")
-            self.pos -= MIN_WINDOW
-            self.input = self.input[MIN_WINDOW:] + data
+        if self.pos > 2 * self.MIN_WINDOW:
+            self.del_pos += self.MIN_WINDOW
+            self.del_line += self.input[:self.MIN_WINDOW].count("\n")
+            self.pos -= self.MIN_WINDOW
+            self.input = self.input[self.MIN_WINDOW:] + data
         else:
             self.input = self.input + data
 
@@ -245,21 +241,17 @@ class Scanner(object):
         Should scan another token and add it to the list, self.tokens,
         and add the restriction to self.restrictions
         """
+        token = None
         # Keep looking for a token, ignoring any in self.ignore
         while True:
             tok = None
 
             self.grab_input()
 
-            # special handling for end-of-file
-            if self.stacked and self.pos == len(self.input):
-                raise StopIteration
-
             # Search the patterns for the longest match, with earlier
             # tokens in the list having preference
-            best_match = -1
+            best_pat_len = -1
             best_pat = None
-            best_m = None
             for tok, regex in self.patterns:
                 if DEBUG:
                     print("\tTrying %s: %s at pos %d -> %s" % (repr(tok), repr(regex.pattern), self.pos, repr(self.input)))
@@ -269,56 +261,44 @@ class Scanner(object):
                         print "\tSkipping %s!" % repr(tok)
                     continue
                 m = regex.match(self.input, self.pos)
-                if m and m.end() - m.start() > best_match:
+                if m and m.end() - m.start() > best_pat_len:
                     # We got a match that's better than the previous one
                     best_pat = tok
-                    best_match = m.end() - m.start()
-                    best_m = m
+                    best_pat_len = m.end() - m.start()
                     if DEBUG:
                         print("Match OK! %s: %s at pos %d" % (repr(tok), repr(regex.pattern), self.pos))
+                    break
 
             # If we didn't find anything, raise an error
-            if best_pat is None or best_match < 0:
+            if best_pat is None or best_pat_len < 0:
                 msg = "Bad token: %s" % ("???" if tok is None else repr(tok),)
                 if restrict:
                     msg = "%s found while trying to find one of the restricted tokens: %s" % ("???" if tok is None else repr(tok), ", ".join(repr(r) for r in restrict))
                 raise SyntaxError(self.get_pos(), msg, context=context)
 
-            ignore = best_pat in self.ignore
-            end_pos = self.pos + best_match
-            value = self.input[self.pos:end_pos]
+            ignore = best_pat in self.ignore  # Should this token be ignored?
+            start_pos = self.pos
+            end_pos = start_pos + best_pat_len
+            self.pos = end_pos
+
+            # If we found something that isn't to be ignored, return it
             if not ignore:
+                value = self.input[start_pos:end_pos]
                 # token = Token(type=best_pat, value=value, pos=self.get_pos())
                 token = (
-                    self.pos,
+                    start_pos,
                     end_pos,
                     best_pat,
                     value,
                 )
-            self.pos = end_pos
-
-            npos = value.rfind("\n")
-            if npos > -1:
-                self.col = best_match - npos
-                self.line += value.count('\n')
-            else:
-                self.col += best_match
-
-            # If we found something that isn't to be ignored, return it
-            if not ignore:
                 # print repr(token)
                 if not self.tokens or token != self.last_read_token:
-                    # Only add this token if it's not in the list
-                    # (to prevent looping)
+                    # Only add this token if it's not in the list (to prevent looping)
                     self.last_read_token = token
                     self.tokens.append(token)
                     self.restrictions.append(restrict)
                     return 1
                 return 0
-            else:
-                ignore = self.ignore[best_pat]
-                if ignore:
-                    ignore(self, best_m)
 
     def token(self, i, restrict=None, **kwargs):
         """
@@ -335,7 +315,15 @@ class Scanner(object):
                 raise NotImplementedError("Unimplemented: restriction set changed")
         if i >= 0 and i < tokens_len:
             return self.tokens[i]
-        raise NoMoreTokens
+        raise NoMoreTokens()
+
+    def rewind(self, i):
+        tokens_len = len(self.tokens)
+        if i <= tokens_len:
+            token = self.tokens[i]
+            self.tokens = self.tokens[:i]
+            self.restrictions = self.restrictions[:i]
+            self.pos = token[0]
 
 
 class Parser(object):
author	German M. Bravo <german.mb@deipi.com>	2013-08-15 17:07:27 -0500
committer	German M. Bravo <german.mb@deipi.com>	2013-08-16 10:16:22 -0500
commit	7f8f00988e6c58d9d49d6e4c973ee177345b46b0 (patch)
tree	2351271978f311f9b0c07ff6ce9a20d6774961d6
parent	0416b034d8e80e134b307515ce2f755abcc691eb (diff)
download	pyscss-grammar-test.tar.gz