summaryrefslogtreecommitdiff
path: root/scss/grammar/scanner.py
diff options
context:
space:
mode:
Diffstat (limited to 'scss/grammar/scanner.py')
-rw-r--r--scss/grammar/scanner.py274
1 files changed, 274 insertions, 0 deletions
diff --git a/scss/grammar/scanner.py b/scss/grammar/scanner.py
new file mode 100644
index 0000000..d466254
--- /dev/null
+++ b/scss/grammar/scanner.py
@@ -0,0 +1,274 @@
+"""Pure-Python scanner and parser, used if the C module is not available."""
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from collections import deque
+
+import re
+
+DEBUG = False
+
+# TODO copied from __init__
+_blocks_re = re.compile(r'[{},;()\'"\n]')
+
+
+try:
+ from ._scanner import locate_blocks
+except ImportError:
+ def locate_blocks(codestr):
+ """
+ For processing CSS like strings.
+
+ Either returns all selectors (that can be "smart" multi-lined, as
+ long as it's joined by `,`, or enclosed in `(` and `)`) with its code block
+ (the one between `{` and `}`, which can be nested), or the "lose" code
+ (properties) that doesn't have any blocks.
+ """
+ lineno = 1
+
+ par = 0
+ instr = None
+ depth = 0
+ skip = False
+ i = init = lose = 0
+ start = end = None
+ lineno_stack = deque()
+
+ for m in _blocks_re.finditer(codestr):
+ i = m.start(0)
+ c = codestr[i]
+ if c == '\n':
+ lineno += 1
+
+ if instr is not None:
+ if c == instr:
+ instr = None # A string ends (FIXME: needs to accept escaped characters)
+ elif c in ('"', "'"):
+ instr = c # A string starts
+ elif c == '(': # parenthesis begins:
+ par += 1
+ elif c == ')': # parenthesis ends:
+ par -= 1
+ elif not par and not instr:
+ if c == '{': # block begins:
+ if depth == 0:
+ if i > 0 and codestr[i - 1] == '#': # Do not process #{...} as blocks!
+ skip = True
+ else:
+ lineno_stack.append(lineno)
+ start = i
+ if lose < init:
+ _property = codestr[lose:init].strip()
+ if _property:
+ yield lineno, _property, None
+ lose = init
+ depth += 1
+ elif c == '}': # block ends:
+ if depth <= 0:
+ raise SyntaxError("Unexpected closing brace on line {0}".format(lineno))
+ else:
+ depth -= 1
+ if depth == 0:
+ if not skip:
+ end = i
+ _selectors = codestr[init:start].strip()
+ _codestr = codestr[start + 1:end].strip()
+ if _selectors:
+ yield lineno_stack.pop(), _selectors, _codestr
+ init = lose = end + 1
+ skip = False
+ elif depth == 0:
+ if c == ';': # End of property (or block):
+ init = i
+ if lose < init:
+ _property = codestr[lose:init].strip()
+ if _property:
+ yield lineno, _property, None
+ init = lose = i + 1
+ if depth > 0:
+ if not skip:
+ _selectors = codestr[init:start].strip()
+ _codestr = codestr[start + 1:].strip()
+ if _selectors:
+ yield lineno, _selectors, _codestr
+ if par:
+ raise Exception("Missing closing parenthesis somewhere in block: '%s'" % _selectors)
+ elif instr:
+ raise Exception("Missing closing string somewhere in block: '%s'" % _selectors)
+ else:
+ raise Exception("Block never closed: '%s'" % _selectors)
+ losestr = codestr[lose:]
+ for _property in losestr.split(';'):
+ _property = _property.strip()
+ lineno += _property.count('\n')
+ if _property:
+ yield lineno, _property, None
+
+
+################################################################################
+# Parser
+
+# NOTE: This class has no C equivalent
+class Parser(object):
+ def __init__(self, scanner):
+ self._scanner = scanner
+ self._pos = 0
+ self._char_pos = 0
+
+ def reset(self, input):
+ self._scanner.reset(input)
+ self._pos = 0
+ self._char_pos = 0
+
+ def _peek(self, types):
+ """
+ Returns the token type for lookahead; if there are any args
+ then the list of args is the set of token types to allow
+ """
+ try:
+ tok = self._scanner.token(self._pos, types)
+ return tok[2]
+ except SyntaxError:
+ return None
+
+ def _scan(self, type):
+ """
+ Returns the matched text, and moves to the next token
+ """
+ tok = self._scanner.token(self._pos, set([type]))
+ self._char_pos = tok[0]
+ if tok[2] != type:
+ raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(tok[0]), "Trying to find " + type))
+ self._pos += 1
+ return tok[3]
+
+
+try:
+ from ._scanner import NoMoreTokens
+except ImportError:
+ class NoMoreTokens(Exception):
+ """
+ Another exception object, for when we run out of tokens
+ """
+ pass
+
+
+try:
+ from ._scanner import Scanner
+except ImportError:
+ class Scanner(object):
+ def __init__(self, patterns, ignore, input=None):
+ """
+ Patterns is [(terminal,regex)...]
+ Ignore is [terminal,...];
+ Input is a string
+ """
+ self.reset(input)
+ self.ignore = ignore
+ # The stored patterns are a pair (compiled regex,source
+ # regex). If the patterns variable passed in to the
+ # constructor is None, we assume that the class already has a
+ # proper .patterns list constructed
+ if patterns is not None:
+ self.patterns = []
+ for k, r in patterns:
+ self.patterns.append((k, re.compile(r)))
+
+ def reset(self, input):
+ self.tokens = []
+ self.restrictions = []
+ self.input = input
+ self.pos = 0
+
+ def __repr__(self):
+ """
+ Print the last 10 tokens that have been scanned in
+ """
+ output = ''
+ for t in self.tokens[-10:]:
+ output = "%s\n (@%s) %s = %s" % (output, t[0], t[2], repr(t[3]))
+ return output
+
+ def _scan(self, restrict):
+ """
+ Should scan another token and add it to the list, self.tokens,
+ and add the restriction to self.restrictions
+ """
+ # Keep looking for a token, ignoring any in self.ignore
+ token = None
+ while True:
+ best_pat = None
+ # Search the patterns for a match, with earlier
+ # tokens in the list having preference
+ best_pat_len = 0
+ for tok, regex in self.patterns:
+ if DEBUG:
+ print("\tTrying %s: %s at pos %d -> %s" % (repr(tok), repr(regex.pattern), self.pos, repr(self.input)))
+ # First check to see if we're restricting to this token
+ if restrict and tok not in restrict and tok not in self.ignore:
+ if DEBUG:
+ print("\tSkipping %r!" % (tok,))
+ continue
+ m = regex.match(self.input, self.pos)
+ if m:
+ # We got a match
+ best_pat = tok
+ best_pat_len = len(m.group(0))
+ if DEBUG:
+ print("Match OK! %s: %s at pos %d" % (repr(tok), repr(regex.pattern), self.pos))
+ break
+
+ # If we didn't find anything, raise an error
+ if best_pat is None:
+ msg = "Bad token found"
+ if restrict:
+ msg = "Bad token found while trying to find one of the restricted tokens: %s" % (", ".join(repr(r) for r in restrict))
+ raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(self.pos), msg))
+
+ # If we found something that isn't to be ignored, return it
+ if best_pat in self.ignore:
+ # This token should be ignored...
+ self.pos += best_pat_len
+ else:
+ end_pos = self.pos + best_pat_len
+ # Create a token with this data
+ token = (
+ self.pos,
+ end_pos,
+ best_pat,
+ self.input[self.pos:end_pos]
+ )
+ break
+ if token is not None:
+ self.pos = token[1]
+ # Only add this token if it's not in the list
+ # (to prevent looping)
+ if not self.tokens or token != self.tokens[-1]:
+ self.tokens.append(token)
+ self.restrictions.append(restrict)
+ return 1
+ return 0
+
+ def token(self, i, restrict=None):
+ """
+ Get the i'th token, and if i is one past the end, then scan
+ for another token; restrict is a list of tokens that
+ are allowed, or 0 for any token.
+ """
+ tokens_len = len(self.tokens)
+ if i == tokens_len: # We are at the end, get the next...
+ tokens_len += self._scan(restrict)
+ if i < tokens_len:
+ if restrict and self.restrictions[i] and restrict > self.restrictions[i]:
+ raise NotImplementedError("Unimplemented: restriction set changed")
+ return self.tokens[i]
+ raise NoMoreTokens
+
+ def rewind(self, i):
+ tokens_len = len(self.tokens)
+ if i <= tokens_len:
+ token = self.tokens[i]
+ self.tokens = self.tokens[:i]
+ self.restrictions = self.restrictions[:i]
+ self.pos = token[0]