summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGerman M. Bravo <german.mb@deipi.com>2013-08-15 17:07:27 -0500
committerGerman M. Bravo <german.mb@deipi.com>2013-08-16 10:16:22 -0500
commit7f8f00988e6c58d9d49d6e4c973ee177345b46b0 (patch)
tree2351271978f311f9b0c07ff6ce9a20d6774961d6
parent0416b034d8e80e134b307515ce2f755abcc691eb (diff)
downloadpyscss-grammar-test.tar.gz
Scanner updatedgrammar-test
-rw-r--r--scss/src/_speedups.c2
-rw-r--r--scss/src/scanner.c101
-rwxr-xr-xscss/src/scanner.py127
-rw-r--r--scss/src/yapps/runtime.py84
4 files changed, 87 insertions, 227 deletions
diff --git a/scss/src/_speedups.c b/scss/src/_speedups.c
index b4b990c..59c56e5 100644
--- a/scss/src/_speedups.c
+++ b/scss/src/_speedups.c
@@ -6,7 +6,7 @@
* https://github.com/Kronuz/pyScss
*
* MIT license (http://www.opensource.org/licenses/mit-license.php)
-* Copyright (c) 2011 German M. Bravo (Kronuz), All rights reserved.
+* Copyright (c) 2011, 2013 German M. Bravo (Kronuz), All rights reserved.
*/
#include <Python.h>
#include "block_locator.h"
diff --git a/scss/src/scanner.c b/scss/src/scanner.c
index ca74ba7..a196aa5 100644
--- a/scss/src/scanner.c
+++ b/scss/src/scanner.c
@@ -6,7 +6,7 @@
* https://github.com/Kronuz/pyScss
*
* MIT license (http://www.opensource.org/licenses/mit-license.php)
-* Copyright (c) 2011 German M. Bravo (Kronuz), All rights reserved.
+* Copyright (c) 2011, 2013 German M. Bravo (Kronuz), All rights reserved.
*/
#include <Python.h>
@@ -159,10 +159,10 @@ Pattern_finalize(void) {
static long
_Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
{
- Token best_token, *p_token;
+ Token best_pat, *last_read_token;
Restriction *p_restriction;
Pattern *regex;
- int j, k, max, skip;
+ int j, k, max, ignore;
size_t len;
char *aux;
@@ -172,7 +172,7 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
while (1) {
regex = NULL;
- best_token.regex = NULL;
+ best_pat.regex = NULL;
/* Search the patterns for a match, with earlier
tokens in the list having preference */
for (j = 0; j < Pattern_patterns_sz; j++) {
@@ -181,24 +181,24 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
fprintf(stderr, "\tTrying %s: %s at pos %d -> %s\n", repr(regex->tok), repr(regex->expr), self->pos, repr(self->input));
#endif
/* First check to see if we're restricting to this token */
- skip = restrictions_sz;
- if (skip) {
+ ignore = restrictions_sz;
+ if (ignore) {
max = (restrictions_sz > self->ignore_sz) ? restrictions_sz : self->ignore_sz;
for (k = 0; k < max; k++) {
if (k < restrictions_sz && strcmp(regex->tok, restrictions[k].tok) == 0) {
- skip = 0;
+ ignore = 0;
break;
}
if (k < self->ignore_sz && regex == self->ignore[k]) {
- skip = 0;
+ ignore = 0;
break;
}
}
- if (skip) {
- continue;
+ if (ignore) {
#ifdef DEBUG
fprintf(stderr, "\tSkipping %s!\n", repr(regex->tok));
#endif
+ continue;
}
}
if (Pattern_match(
@@ -206,7 +206,7 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
self->input,
self->input_sz,
self->pos,
- &best_token
+ &best_pat
)) {
#ifdef DEBUG
fprintf(stderr, "Match OK! %s: %s at pos %d\n", repr(regex->tok), repr(regex->expr), self->pos);
@@ -215,7 +215,7 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
}
}
/* If we didn't find anything, raise an error */
- if (best_token.regex == NULL) {
+ if (best_pat.regex == NULL) {
if (restrictions_sz) {
sprintf(self->exc, "SyntaxError[@ char %d: %s found while trying to find one of the restricted tokens: ", self->pos, (regex == NULL) ? "???" : repr(regex->tok));
aux = self->exc + strlen(self->exc);
@@ -234,55 +234,52 @@ _Scanner_scan(Scanner *self, Pattern *restrictions, int restrictions_sz)
sprintf(self->exc, "SyntaxError[@ char %d: Bad token: %s]", self->pos, (regex == NULL) ? "???" : repr(regex->tok));
return SCANNER_EXC_BAD_TOKEN;
}
- /* If we found something that isn't to be ignored, return it */
- skip = 0;
+
+ ignore = 0; /* Should this token be ignored? */
for (k = 0; k < self->ignore_sz; k++) {
- if (best_token.regex == self->ignore[k]) {
- /* This token should be ignored... */
- self->pos += best_token.string_sz;
- skip = 1;
+ if (best_pat.regex == self->ignore[k]) {
+ ignore = 1;
break;
}
}
- if (!skip) {
- break;
- }
- }
- if (best_token.regex) {
- self->pos = (int)(best_token.string - self->input + best_token.string_sz);
- /* Only add this token if it's not in the list (to prevent looping) */
- p_token = &self->tokens[self->tokens_sz - 1];
- if (self->tokens_sz == 0 ||
- p_token->regex != best_token.regex ||
- p_token->string != best_token.string ||
- p_token->string_sz != best_token.string_sz
- ) {
- if (self->tokens_sz >= self->tokens_bsz) {
- /* Needs to expand block */
- self->tokens_bsz = self->tokens_bsz + BLOCK_SIZE_PATTERNS;
- PyMem_Resize(self->tokens, Token, self->tokens_bsz);
- PyMem_Resize(self->restrictions, Restriction, self->tokens_bsz);
- }
- memcpy(&self->tokens[self->tokens_sz], &best_token, sizeof(Token));
- p_restriction = &self->restrictions[self->tokens_sz];
- if (restrictions_sz) {
- p_restriction->patterns = PyMem_New(Pattern *, restrictions_sz);
- p_restriction->patterns_sz = 0;
- for (j = 0; j < restrictions_sz; j++) {
- regex = Pattern_regex(restrictions[j].tok, restrictions[j].expr);
- if (regex) {
- p_restriction->patterns[p_restriction->patterns_sz++] = regex;
+ self->pos += best_pat.string_sz;
+
+ /* If we found something that isn't to be ignored, return it */
+ if (!ignore) {
+ /* Only add this token if it's not in the list (to prevent looping) */
+ last_read_token = &self->tokens[self->tokens_sz - 1];
+ if (self->tokens_sz == 0 ||
+ last_read_token->regex != best_pat.regex ||
+ last_read_token->string != best_pat.string ||
+ last_read_token->string_sz != best_pat.string_sz
+ ) {
+ if (self->tokens_sz >= self->tokens_bsz) {
+ /* Needs to expand blocks */
+ self->tokens_bsz = self->tokens_bsz + BLOCK_SIZE_PATTERNS;
+ PyMem_Resize(self->tokens, Token, self->tokens_bsz);
+ PyMem_Resize(self->restrictions, Restriction, self->tokens_bsz);
+ }
+ memcpy(&self->tokens[self->tokens_sz], &best_pat, sizeof(Token));
+ p_restriction = &self->restrictions[self->tokens_sz];
+ if (restrictions_sz) {
+ p_restriction->patterns = PyMem_New(Pattern *, restrictions_sz);
+ p_restriction->patterns_sz = 0;
+ for (j = 0; j < restrictions_sz; j++) {
+ regex = Pattern_regex(restrictions[j].tok, restrictions[j].expr);
+ if (regex) {
+ p_restriction->patterns[p_restriction->patterns_sz++] = regex;
+ }
}
+ } else {
+ p_restriction->patterns = NULL;
+ p_restriction->patterns_sz = 0;
}
- } else {
- p_restriction->patterns = NULL;
- p_restriction->patterns_sz = 0;
+ self->tokens_sz++;
+ return 1;
}
- self->tokens_sz++;
- return 1;
+ return 0;
}
}
- return 0;
}
diff --git a/scss/src/scanner.py b/scss/src/scanner.py
index 97c0b34..d148b30 100755
--- a/scss/src/scanner.py
+++ b/scss/src/scanner.py
@@ -84,132 +84,7 @@ PATTERNS = [
# Parser
DEBUG = False
-
-class NoMoreTokens(Exception):
- """
- Another exception object, for when we run out of tokens
- """
- pass
-
-
-class Scanner(object):
- def __init__(self, patterns, ignore, input=None):
- """
- Patterns is [(terminal,regex)...]
- Ignore is [terminal,...];
- Input is a string
- """
- self.reset(input)
- self.ignore = ignore
- # The stored patterns are a pair (compiled regex,source
- # regex). If the patterns variable passed in to the
- # constructor is None, we assume that the class already has a
- # proper .patterns list constructed
- if patterns is not None:
- self.patterns = []
- for k, r in patterns:
- self.patterns.append((k, re.compile(r)))
-
- def reset(self, input):
- self.tokens = []
- self.restrictions = []
- self.input = input
- self.pos = 0
-
- def __repr__(self):
- """
- Print the last 10 tokens that have been scanned in
- """
- output = ''
- for t in self.tokens[-10:]:
- output = "%s\n (@%s) %s = %s" % (output, t[0], t[2], repr(t[3]))
- return output
-
- def _scan(self, restrict):
- """
- Should scan another token and add it to the list, self.tokens,
- and add the restriction to self.restrictions
- """
- # Keep looking for a token, ignoring any in self.ignore
- token = None
- while True:
- tok = None
- best_pat = None
- # Search the patterns for a match, with earlier
- # tokens in the list having preference
- best_pat_len = 0
- for tok, regex in self.patterns:
- if DEBUG:
- print("\tTrying %s: %s at pos %d -> %s" % (repr(tok), repr(regex.pattern), self.pos, repr(self.input)))
- # First check to see if we're restricting to this token
- if restrict and tok not in restrict and tok not in self.ignore:
- if DEBUG:
- print "\tSkipping %s!" % repr(tok)
- continue
- m = regex.match(self.input, self.pos)
- if m:
- # We got a match
- best_pat = tok
- best_pat_len = len(m.group(0))
- if DEBUG:
- print("Match OK! %s: %s at pos %d" % (repr(tok), repr(regex.pattern), self.pos))
- break
-
- # If we didn't find anything, raise an error
- if best_pat is None:
- msg = "Bad token: %s" % ("???" if tok is None else repr(tok),)
- if restrict:
- msg = "%s found while trying to find one of the restricted tokens: %s" % ("???" if tok is None else repr(tok), ", ".join(repr(r) for r in restrict))
- raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(self.pos), msg))
-
- # If we found something that isn't to be ignored, return it
- if best_pat in self.ignore:
- # This token should be ignored...
- self.pos += best_pat_len
- else:
- end_pos = self.pos + best_pat_len
- # Create a token with this data
- token = (
- self.pos,
- end_pos,
- best_pat,
- self.input[self.pos:end_pos]
- )
- break
- if token is not None:
- self.pos = token[1]
- # Only add this token if it's not in the list
- # (to prevent looping)
- if not self.tokens or token != self.tokens[-1]:
- self.tokens.append(token)
- self.restrictions.append(restrict)
- return 1
- return 0
-
- def token(self, i, restrict=None):
- """
- Get the i'th token, and if i is one past the end, then scan
- for another token; restrict is a list of tokens that
- are allowed, or 0 for any token.
- """
- tokens_len = len(self.tokens)
- if i == tokens_len: # We are at the end, get the next...
- tokens_len += self._scan(restrict)
- elif i >= 0 and i < tokens_len:
- if restrict and self.restrictions[i] and restrict > self.restrictions[i]:
- raise NotImplementedError("Unimplemented: restriction set changed")
- if i >= 0 and i < tokens_len:
- return self.tokens[i]
- raise NoMoreTokens()
-
- def rewind(self, i):
- tokens_len = len(self.tokens)
- if i <= tokens_len:
- token = self.tokens[i]
- self.tokens = self.tokens[:i]
- self.restrictions = self.restrictions[:i]
- self.pos = token[0]
-
+from yapps.runtime import Scanner
class _Scanner_a(Scanner):
patterns = None
diff --git a/scss/src/yapps/runtime.py b/scss/src/yapps/runtime.py
index e23210a..6920fdb 100644
--- a/scss/src/yapps/runtime.py
+++ b/scss/src/yapps/runtime.py
@@ -21,7 +21,6 @@ import re
import sys
DEBUG = False
-MIN_WINDOW = 4096
# File lookup window
@@ -71,9 +70,6 @@ class Token(object):
return output
-in_name = 0
-
-
class Scanner(object):
"""Yapps scanner.
@@ -85,9 +81,11 @@ class Scanner(object):
restriction (the set is always the full set of tokens).
"""
+ MIN_WINDOW = 4096
+ in_name = 0
def __init__(self, patterns, ignore, input="",
- file=None, filename=None, stacked=False):
+ file=None, filename=None):
"""Initialize the scanner.
Parameters:
@@ -106,13 +104,11 @@ class Scanner(object):
"""
if not filename:
- global in_name
- filename = "<f.%d>" % in_name
- in_name += 1
+ filename = "<f.%d>" % self.__class__.in_name
+ self.__class__.in_name += 1
self.reset(input, file, filename)
self.ignore = ignore
- self.stacked = stacked
if patterns is not None:
# Compile the regex strings into regex objects
@@ -216,19 +212,19 @@ class Scanner(object):
"""Get more input if possible."""
if not self.file:
return
- if len(self.input) - self.pos >= MIN_WINDOW:
+ if len(self.input) - self.pos >= self.MIN_WINDOW:
return
- data = self.file.read(MIN_WINDOW)
+ data = self.file.read(self.MIN_WINDOW)
if data is None or data == "":
self.file = None
# Drop bytes from the start, if necessary.
- if self.pos > 2 * MIN_WINDOW:
- self.del_pos += MIN_WINDOW
- self.del_line += self.input[:MIN_WINDOW].count("\n")
- self.pos -= MIN_WINDOW
- self.input = self.input[MIN_WINDOW:] + data
+ if self.pos > 2 * self.MIN_WINDOW:
+ self.del_pos += self.MIN_WINDOW
+ self.del_line += self.input[:self.MIN_WINDOW].count("\n")
+ self.pos -= self.MIN_WINDOW
+ self.input = self.input[self.MIN_WINDOW:] + data
else:
self.input = self.input + data
@@ -245,21 +241,17 @@ class Scanner(object):
Should scan another token and add it to the list, self.tokens,
and add the restriction to self.restrictions
"""
+ token = None
# Keep looking for a token, ignoring any in self.ignore
while True:
tok = None
self.grab_input()
- # special handling for end-of-file
- if self.stacked and self.pos == len(self.input):
- raise StopIteration
-
# Search the patterns for the longest match, with earlier
# tokens in the list having preference
- best_match = -1
+ best_pat_len = -1
best_pat = None
- best_m = None
for tok, regex in self.patterns:
if DEBUG:
print("\tTrying %s: %s at pos %d -> %s" % (repr(tok), repr(regex.pattern), self.pos, repr(self.input)))
@@ -269,56 +261,44 @@ class Scanner(object):
print "\tSkipping %s!" % repr(tok)
continue
m = regex.match(self.input, self.pos)
- if m and m.end() - m.start() > best_match:
+ if m and m.end() - m.start() > best_pat_len:
# We got a match that's better than the previous one
best_pat = tok
- best_match = m.end() - m.start()
- best_m = m
+ best_pat_len = m.end() - m.start()
if DEBUG:
print("Match OK! %s: %s at pos %d" % (repr(tok), repr(regex.pattern), self.pos))
+ break
# If we didn't find anything, raise an error
- if best_pat is None or best_match < 0:
+ if best_pat is None or best_pat_len < 0:
msg = "Bad token: %s" % ("???" if tok is None else repr(tok),)
if restrict:
msg = "%s found while trying to find one of the restricted tokens: %s" % ("???" if tok is None else repr(tok), ", ".join(repr(r) for r in restrict))
raise SyntaxError(self.get_pos(), msg, context=context)
- ignore = best_pat in self.ignore
- end_pos = self.pos + best_match
- value = self.input[self.pos:end_pos]
+ ignore = best_pat in self.ignore # Should this token be ignored?
+ start_pos = self.pos
+ end_pos = start_pos + best_pat_len
+ self.pos = end_pos
+
+ # If we found something that isn't to be ignored, return it
if not ignore:
+ value = self.input[start_pos:end_pos]
# token = Token(type=best_pat, value=value, pos=self.get_pos())
token = (
- self.pos,
+ start_pos,
end_pos,
best_pat,
value,
)
- self.pos = end_pos
-
- npos = value.rfind("\n")
- if npos > -1:
- self.col = best_match - npos
- self.line += value.count('\n')
- else:
- self.col += best_match
-
- # If we found something that isn't to be ignored, return it
- if not ignore:
# print repr(token)
if not self.tokens or token != self.last_read_token:
- # Only add this token if it's not in the list
- # (to prevent looping)
+ # Only add this token if it's not in the list (to prevent looping)
self.last_read_token = token
self.tokens.append(token)
self.restrictions.append(restrict)
return 1
return 0
- else:
- ignore = self.ignore[best_pat]
- if ignore:
- ignore(self, best_m)
def token(self, i, restrict=None, **kwargs):
"""
@@ -335,7 +315,15 @@ class Scanner(object):
raise NotImplementedError("Unimplemented: restriction set changed")
if i >= 0 and i < tokens_len:
return self.tokens[i]
- raise NoMoreTokens
+ raise NoMoreTokens()
+
+ def rewind(self, i):
+ tokens_len = len(self.tokens)
+ if i <= tokens_len:
+ token = self.tokens[i]
+ self.tokens = self.tokens[:i]
+ self.restrictions = self.restrictions[:i]
+ self.pos = token[0]
class Parser(object):