From 743dd8bca34035e701c2d769b9b5cc010c3840c2 Mon Sep 17 00:00:00 2001 From: "Eevee (Alex Munroe)" Date: Thu, 28 Aug 2014 18:19:44 -0700 Subject: Move ALL the parsing stuff under scss/grammar/. Also, in the same vein as Python 3's approach, simply importing from the "native" module will automatically produce the sped-up versions if available. Conflicts: scss/compiler.py --- scss/_grammar.py | 407 --------------- scss/_native.py | 265 ---------- scss/compiler.py | 7 +- scss/expression.py | 5 +- scss/grammar/__init__.py | 9 + scss/grammar/expression.g | 218 ++++++++ scss/grammar/expression.py | 405 +++++++++++++++ scss/grammar/scanner.py | 274 ++++++++++ scss/src/_speedups.c | 8 +- scss/src/grammar/LICENSE | 18 - scss/src/grammar/README | 6 - scss/src/grammar/grammar.g | 220 -------- scss/src/grammar/yapps2.py | 896 -------------------------------- scss/src/grammar/yappsrt.py | 275 ---------- setup.py | 2 +- yapps2.py | 1178 +++++++++++++++++++++++++++++++++++++++++++ 16 files changed, 2091 insertions(+), 2102 deletions(-) delete mode 100644 scss/_grammar.py delete mode 100644 scss/_native.py create mode 100644 scss/grammar/__init__.py create mode 100644 scss/grammar/expression.g create mode 100644 scss/grammar/expression.py create mode 100644 scss/grammar/scanner.py delete mode 100644 scss/src/grammar/LICENSE delete mode 100644 scss/src/grammar/README delete mode 100644 scss/src/grammar/grammar.g delete mode 100755 scss/src/grammar/yapps2.py delete mode 100644 scss/src/grammar/yappsrt.py create mode 100755 yapps2.py diff --git a/scss/_grammar.py b/scss/_grammar.py deleted file mode 100644 index 6296e0b..0000000 --- a/scss/_grammar.py +++ /dev/null @@ -1,407 +0,0 @@ -"""Grammar for parsing Sass expressions.""" -# This is a GENERATED FILE -- DO NOT EDIT DIRECTLY! -# Edit scss/src/grammar/grammar.g, then run: -# python2 scss/src/grammar/yapps2.py scss/src/grammar/grammar.g scss/_grammar.py - -import operator -import re - -from scss.ast import Parentheses -from scss.ast import UnaryOp -from scss.ast import BinaryOp -from scss.ast import AnyOp -from scss.ast import AllOp -from scss.ast import NotOp -from scss.ast import CallOp -from scss.ast import Variable -from scss.ast import Literal -from scss.ast import ListLiteral -from scss.ast import MapLiteral -from scss.ast import ArgspecLiteral -from scss.types import Color -from scss.types import Number -from scss.types import String -from scss.types import Url -from scss.util import dequote - -from scss._native import Parser -try: - from scss._speedups import Scanner -except ImportError: - from scss._native import Scanner - - - -class SassExpressionScanner(Scanner): - patterns = None - _patterns = [ - ('"\'"', "'"), - ('"\\""', '"'), - ('"url"', 'url'), - ('":"', ':'), - ('","', ','), - ('[ \r\t\n]+', '[ \r\t\n]+'), - ('LPAR', '\\(|\\['), - ('RPAR', '\\)|\\]'), - ('END', '$'), - ('MUL', '[*]'), - ('DIV', '/'), - ('ADD', '[+]'), - ('SUB', '-\\s'), - ('SIGN', '-(?![a-zA-Z_])'), - ('AND', '(?='), - ('LT', '<'), - ('GT', '>'), - ('DOTDOTDOT', '[.]{3}'), - ('KWSTR', "'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'(?=\\s*:)"), - ('STR', "'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'"), - ('KWQSTR', '"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"(?=\\s*:)'), - ('QSTR', '"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"'), - ('UNITS', '(? 1 else v[0] - - def expr_slst(self): - or_expr = self.or_expr() - v = [or_expr] - while self._peek(self.expr_slst_rsts) not in self.argspec_items_rsts: - or_expr = self.or_expr() - v.append(or_expr) - return ListLiteral(v, comma=False) if len(v) > 1 else v[0] - - def or_expr(self): - and_expr = self.and_expr() - v = and_expr - while self._peek(self.or_expr_rsts) == 'OR': - OR = self._scan('OR') - and_expr = self.and_expr() - v = AnyOp(v, and_expr) - return v - - def and_expr(self): - not_expr = self.not_expr() - v = not_expr - while self._peek(self.and_expr_rsts) == 'AND': - AND = self._scan('AND') - not_expr = self.not_expr() - v = AllOp(v, not_expr) - return v - - def not_expr(self): - _token_ = self._peek(self.argspec_item_chks) - if _token_ != 'NOT': - comparison = self.comparison() - return comparison - else: # == 'NOT' - NOT = self._scan('NOT') - not_expr = self.not_expr() - return NotOp(not_expr) - - def comparison(self): - a_expr = self.a_expr() - v = a_expr - while self._peek(self.comparison_rsts) in self.comparison_chks: - _token_ = self._peek(self.comparison_chks) - if _token_ == 'LT': - LT = self._scan('LT') - a_expr = self.a_expr() - v = BinaryOp(operator.lt, v, a_expr) - elif _token_ == 'GT': - GT = self._scan('GT') - a_expr = self.a_expr() - v = BinaryOp(operator.gt, v, a_expr) - elif _token_ == 'LE': - LE = self._scan('LE') - a_expr = self.a_expr() - v = BinaryOp(operator.le, v, a_expr) - elif _token_ == 'GE': - GE = self._scan('GE') - a_expr = self.a_expr() - v = BinaryOp(operator.ge, v, a_expr) - elif _token_ == 'EQ': - EQ = self._scan('EQ') - a_expr = self.a_expr() - v = BinaryOp(operator.eq, v, a_expr) - else: # == 'NE' - NE = self._scan('NE') - a_expr = self.a_expr() - v = BinaryOp(operator.ne, v, a_expr) - return v - - def a_expr(self): - m_expr = self.m_expr() - v = m_expr - while self._peek(self.a_expr_rsts) in self.a_expr_chks: - _token_ = self._peek(self.a_expr_chks) - if _token_ == 'ADD': - ADD = self._scan('ADD') - m_expr = self.m_expr() - v = BinaryOp(operator.add, v, m_expr) - else: # == 'SUB' - SUB = self._scan('SUB') - m_expr = self.m_expr() - v = BinaryOp(operator.sub, v, m_expr) - return v - - def m_expr(self): - u_expr = self.u_expr() - v = u_expr - while self._peek(self.m_expr_rsts) in self.m_expr_chks: - _token_ = self._peek(self.m_expr_chks) - if _token_ == 'MUL': - MUL = self._scan('MUL') - u_expr = self.u_expr() - v = BinaryOp(operator.mul, v, u_expr) - else: # == 'DIV' - DIV = self._scan('DIV') - u_expr = self.u_expr() - v = BinaryOp(operator.truediv, v, u_expr) - return v - - def u_expr(self): - _token_ = self._peek(self.u_expr_rsts) - if _token_ == 'SIGN': - SIGN = self._scan('SIGN') - u_expr = self.u_expr() - return UnaryOp(operator.neg, u_expr) - elif _token_ == 'ADD': - ADD = self._scan('ADD') - u_expr = self.u_expr() - return UnaryOp(operator.pos, u_expr) - else: # in self.u_expr_chks - atom = self.atom() - return atom - - def atom(self): - _token_ = self._peek(self.u_expr_chks) - if _token_ == 'LPAR': - LPAR = self._scan('LPAR') - _token_ = self._peek(self.atom_rsts) - if _token_ == 'RPAR': - v = ListLiteral([], comma=False) - elif _token_ not in self.argspec_item_chks: - expr_map = self.expr_map() - v = expr_map - else: # in self.argspec_item_chks - expr_lst = self.expr_lst() - v = expr_lst - RPAR = self._scan('RPAR') - return Parentheses(v) - elif _token_ == '"url"': - self._scan('"url"') - LPAR = self._scan('LPAR') - _token_ = self._peek(self.atom_rsts_) - if _token_ == 'URL': - URL = self._scan('URL') - quotes = None - elif _token_ == '"\\""': - self._scan('"\\""') - URL = self._scan('URL') - self._scan('"\\""') - quotes = '"' - else: # == '"\'"' - self._scan('"\'"') - URL = self._scan('URL') - self._scan('"\'"') - quotes = "'" - RPAR = self._scan('RPAR') - return Literal(Url(URL, quotes=quotes)) - elif _token_ == 'FNCT': - FNCT = self._scan('FNCT') - LPAR = self._scan('LPAR') - argspec = self.argspec() - RPAR = self._scan('RPAR') - return CallOp(FNCT, argspec) - elif _token_ == 'BANG_IMPORTANT': - BANG_IMPORTANT = self._scan('BANG_IMPORTANT') - return Literal(String(BANG_IMPORTANT, quotes=None)) - elif _token_ == 'ID': - ID = self._scan('ID') - return Literal.from_bareword(ID) - elif _token_ == 'NUM': - NUM = self._scan('NUM') - UNITS = None - if self._peek(self.atom_rsts__) == 'UNITS': - UNITS = self._scan('UNITS') - return Literal(Number(float(NUM), unit=UNITS)) - elif _token_ == 'STR': - STR = self._scan('STR') - return Literal(String(dequote(STR), quotes="'")) - elif _token_ == 'QSTR': - QSTR = self._scan('QSTR') - return Literal(String(dequote(QSTR), quotes='"')) - elif _token_ == 'COLOR': - COLOR = self._scan('COLOR') - return Literal(Color.from_hex(COLOR, literal=True)) - else: # == 'VAR' - VAR = self._scan('VAR') - return Variable(VAR) - - def kwatom(self): - _token_ = self._peek(self.kwatom_rsts) - if _token_ == '":"': - pass - elif _token_ == 'KWID': - KWID = self._scan('KWID') - return Literal.from_bareword(KWID) - elif _token_ == 'KWNUM': - KWNUM = self._scan('KWNUM') - UNITS = None - if self._peek(self.kwatom_rsts_) == 'UNITS': - UNITS = self._scan('UNITS') - return Literal(Number(float(KWNUM), unit=UNITS)) - elif _token_ == 'KWSTR': - KWSTR = self._scan('KWSTR') - return Literal(String(dequote(KWSTR), quotes="'")) - elif _token_ == 'KWQSTR': - KWQSTR = self._scan('KWQSTR') - return Literal(String(dequote(KWQSTR), quotes='"')) - elif _token_ == 'KWCOLOR': - KWCOLOR = self._scan('KWCOLOR') - return Literal(Color.from_hex(KWCOLOR, literal=True)) - else: # == 'KWVAR' - KWVAR = self._scan('KWVAR') - return Variable(KWVAR) - - u_expr_chks = set(['"url"', 'LPAR', 'COLOR', 'QSTR', 'NUM', 'FNCT', 'STR', 'VAR', 'BANG_IMPORTANT', 'ID']) - m_expr_rsts = set(['LPAR', 'SUB', 'QSTR', 'RPAR', 'MUL', 'DIV', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'STR', 'VAR', 'EQ', 'ID', 'AND', 'ADD', 'NOT', 'OR', '","']) - argspec_items_rsts = set(['RPAR', 'END', '","']) - expr_map_rsts = set(['RPAR', '","']) - argspec_items_rsts__ = set(['KWVAR', 'LPAR', 'QSTR', 'SLURPYVAR', 'COLOR', 'DOTDOTDOT', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID']) - kwatom_rsts = set(['KWVAR', 'KWID', 'KWSTR', 'KWQSTR', 'KWCOLOR', '":"', 'KWNUM']) - argspec_item_chks = set(['"url"', 'LPAR', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID']) - a_expr_chks = set(['ADD', 'SUB']) - expr_slst_rsts = set(['"url"', 'LPAR', 'END', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', 'RPAR', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID', '","']) - atom_rsts__ = set(['LPAR', 'SUB', 'QSTR', 'RPAR', 'VAR', 'MUL', 'DIV', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'STR', 'UNITS', 'EQ', 'ID', 'AND', 'ADD', 'NOT', 'OR', '","']) - or_expr_rsts = set(['"url"', 'LPAR', 'END', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', 'RPAR', 'FNCT', 'STR', 'NOT', 'ID', 'BANG_IMPORTANT', 'OR', '","']) - and_expr_rsts = set(['AND', 'LPAR', 'RPAR', 'END', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'ID', 'BANG_IMPORTANT', 'OR', '","']) - comparison_rsts = set(['LPAR', 'QSTR', 'RPAR', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', '"url"', 'GT', 'END', 'SIGN', 'ADD', 'FNCT', 'STR', 'VAR', 'EQ', 'ID', 'AND', 'GE', 'NOT', 'OR', '","']) - argspec_chks = set(['DOTDOTDOT', 'SLURPYVAR']) - atom_rsts_ = set(['URL', '"\\""', '"\'"']) - expr_map_rsts_ = set(['KWVAR', 'KWID', 'KWSTR', 'KWQSTR', 'RPAR', 'KWCOLOR', '":"', 'KWNUM', '","']) - u_expr_rsts = set(['"url"', 'LPAR', 'COLOR', 'QSTR', 'SIGN', 'ADD', 'NUM', 'FNCT', 'STR', 'VAR', 'BANG_IMPORTANT', 'ID']) - comparison_chks = set(['GT', 'GE', 'NE', 'LT', 'LE', 'EQ']) - argspec_items_rsts_ = set(['KWVAR', 'LPAR', 'RPAR', 'QSTR', 'END', 'SLURPYVAR', 'COLOR', 'DOTDOTDOT', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID']) - a_expr_rsts = set(['LPAR', 'SUB', 'QSTR', 'RPAR', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'STR', 'VAR', 'EQ', 'ID', 'AND', 'ADD', 'NOT', 'OR', '","']) - m_expr_chks = set(['MUL', 'DIV']) - kwatom_rsts_ = set(['UNITS', '":"']) - argspec_items_chks = set(['KWVAR', '"url"', 'LPAR', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID']) - argspec_rsts = set(['KWVAR', 'LPAR', 'BANG_IMPORTANT', 'END', 'SLURPYVAR', 'COLOR', 'DOTDOTDOT', 'RPAR', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'QSTR', 'SIGN', 'ID']) - atom_rsts = set(['KWVAR', 'KWID', 'KWSTR', 'BANG_IMPORTANT', 'LPAR', 'COLOR', 'KWQSTR', 'SIGN', 'RPAR', 'KWCOLOR', 'VAR', 'ADD', 'NUM', '"url"', '":"', 'STR', 'NOT', 'QSTR', 'KWNUM', 'ID', 'FNCT']) - argspec_chks_ = set(['END', 'RPAR']) - argspec_rsts_ = set(['KWVAR', 'LPAR', 'BANG_IMPORTANT', 'END', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'RPAR', 'ID']) - - diff --git a/scss/_native.py b/scss/_native.py deleted file mode 100644 index 2be3d75..0000000 --- a/scss/_native.py +++ /dev/null @@ -1,265 +0,0 @@ -"""Pure-Python scanner and parser, used if _speedups is not available.""" -from __future__ import absolute_import -from __future__ import print_function -from __future__ import unicode_literals - -from collections import deque - -import re - -DEBUG = False - -# TODO copied from __init__ -_blocks_re = re.compile(r'[{},;()\'"\n]') - - -def locate_blocks(codestr): - """ - For processing CSS like strings. - - Either returns all selectors (that can be "smart" multi-lined, as - long as it's joined by `,`, or enclosed in `(` and `)`) with its code block - (the one between `{` and `}`, which can be nested), or the "lose" code - (properties) that doesn't have any blocks. - """ - lineno = 1 - - par = 0 - instr = None - depth = 0 - skip = False - i = init = lose = 0 - start = end = None - lineno_stack = deque() - - for m in _blocks_re.finditer(codestr): - i = m.start(0) - c = codestr[i] - if c == '\n': - lineno += 1 - - if instr is not None: - if c == instr: - instr = None # A string ends (FIXME: needs to accept escaped characters) - elif c in ('"', "'"): - instr = c # A string starts - elif c == '(': # parenthesis begins: - par += 1 - elif c == ')': # parenthesis ends: - par -= 1 - elif not par and not instr: - if c == '{': # block begins: - if depth == 0: - if i > 0 and codestr[i - 1] == '#': # Do not process #{...} as blocks! - skip = True - else: - lineno_stack.append(lineno) - start = i - if lose < init: - _property = codestr[lose:init].strip() - if _property: - yield lineno, _property, None - lose = init - depth += 1 - elif c == '}': # block ends: - if depth <= 0: - raise SyntaxError("Unexpected closing brace on line {0}".format(lineno)) - else: - depth -= 1 - if depth == 0: - if not skip: - end = i - _selectors = codestr[init:start].strip() - _codestr = codestr[start + 1:end].strip() - if _selectors: - yield lineno_stack.pop(), _selectors, _codestr - init = lose = end + 1 - skip = False - elif depth == 0: - if c == ';': # End of property (or block): - init = i - if lose < init: - _property = codestr[lose:init].strip() - if _property: - yield lineno, _property, None - init = lose = i + 1 - if depth > 0: - if not skip: - _selectors = codestr[init:start].strip() - _codestr = codestr[start + 1:].strip() - if _selectors: - yield lineno, _selectors, _codestr - if par: - raise Exception("Missing closing parenthesis somewhere in block: '%s'" % _selectors) - elif instr: - raise Exception("Missing closing string somewhere in block: '%s'" % _selectors) - else: - raise Exception("Block never closed: '%s'" % _selectors) - losestr = codestr[lose:] - for _property in losestr.split(';'): - _property = _property.strip() - lineno += _property.count('\n') - if _property: - yield lineno, _property, None - - -################################################################################ -# Parser - -class Parser(object): - # NOTE: This class has no C equivalent - def __init__(self, scanner): - self._scanner = scanner - self._pos = 0 - self._char_pos = 0 - - def reset(self, input): - self._scanner.reset(input) - self._pos = 0 - self._char_pos = 0 - - def _peek(self, types): - """ - Returns the token type for lookahead; if there are any args - then the list of args is the set of token types to allow - """ - try: - tok = self._scanner.token(self._pos, types) - return tok[2] - except SyntaxError: - return None - - def _scan(self, type): - """ - Returns the matched text, and moves to the next token - """ - tok = self._scanner.token(self._pos, set([type])) - self._char_pos = tok[0] - if tok[2] != type: - raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(tok[0]), "Trying to find " + type)) - self._pos += 1 - return tok[3] - - -class NoMoreTokens(Exception): - """ - Another exception object, for when we run out of tokens - """ - pass - - -class Scanner(object): - def __init__(self, patterns, ignore, input=None): - """ - Patterns is [(terminal,regex)...] - Ignore is [terminal,...]; - Input is a string - """ - self.reset(input) - self.ignore = ignore - # The stored patterns are a pair (compiled regex,source - # regex). If the patterns variable passed in to the - # constructor is None, we assume that the class already has a - # proper .patterns list constructed - if patterns is not None: - self.patterns = [] - for k, r in patterns: - self.patterns.append((k, re.compile(r))) - - def reset(self, input): - self.tokens = [] - self.restrictions = [] - self.input = input - self.pos = 0 - - def __repr__(self): - """ - Print the last 10 tokens that have been scanned in - """ - output = '' - for t in self.tokens[-10:]: - output = "%s\n (@%s) %s = %s" % (output, t[0], t[2], repr(t[3])) - return output - - def _scan(self, restrict): - """ - Should scan another token and add it to the list, self.tokens, - and add the restriction to self.restrictions - """ - # Keep looking for a token, ignoring any in self.ignore - token = None - while True: - best_pat = None - # Search the patterns for a match, with earlier - # tokens in the list having preference - best_pat_len = 0 - for tok, regex in self.patterns: - if DEBUG: - print("\tTrying %s: %s at pos %d -> %s" % (repr(tok), repr(regex.pattern), self.pos, repr(self.input))) - # First check to see if we're restricting to this token - if restrict and tok not in restrict and tok not in self.ignore: - if DEBUG: - print("\tSkipping %r!" % (tok,)) - continue - m = regex.match(self.input, self.pos) - if m: - # We got a match - best_pat = tok - best_pat_len = len(m.group(0)) - if DEBUG: - print("Match OK! %s: %s at pos %d" % (repr(tok), repr(regex.pattern), self.pos)) - break - - # If we didn't find anything, raise an error - if best_pat is None: - msg = "Bad token found" - if restrict: - msg = "Bad token found while trying to find one of the restricted tokens: %s" % (", ".join(repr(r) for r in restrict)) - raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(self.pos), msg)) - - # If we found something that isn't to be ignored, return it - if best_pat in self.ignore: - # This token should be ignored... - self.pos += best_pat_len - else: - end_pos = self.pos + best_pat_len - # Create a token with this data - token = ( - self.pos, - end_pos, - best_pat, - self.input[self.pos:end_pos] - ) - break - if token is not None: - self.pos = token[1] - # Only add this token if it's not in the list - # (to prevent looping) - if not self.tokens or token != self.tokens[-1]: - self.tokens.append(token) - self.restrictions.append(restrict) - return 1 - return 0 - - def token(self, i, restrict=None): - """ - Get the i'th token, and if i is one past the end, then scan - for another token; restrict is a list of tokens that - are allowed, or 0 for any token. - """ - tokens_len = len(self.tokens) - if i == tokens_len: # We are at the end, get the next... - tokens_len += self._scan(restrict) - if i < tokens_len: - if restrict and self.restrictions[i] and restrict > self.restrictions[i]: - raise NotImplementedError("Unimplemented: restriction set changed") - return self.tokens[i] - raise NoMoreTokens - - def rewind(self, i): - tokens_len = len(self.tokens) - if i <= tokens_len: - token = self.tokens[i] - self.tokens = self.tokens[:i] - self.restrictions = self.restrictions[:i] - self.pos = token[0] diff --git a/scss/compiler.py b/scss/compiler.py index dceacef..fd40b74 100644 --- a/scss/compiler.py +++ b/scss/compiler.py @@ -27,6 +27,7 @@ from scss.extension import Extension from scss.extension.core import CoreExtension from scss.extension import NamespaceAdapterExtension from scss.extension.compass.sprites import sprite_map +from scss.grammar import locate_blocks from scss.rule import BlockAtRuleHeader from scss.rule import Namespace from scss.rule import RuleAncestry @@ -45,12 +46,6 @@ from scss.types import Url from scss.util import dequote from scss.util import normalize_var # TODO put in... namespace maybe? -try: - # Use C version if available - from scss._speedups import locate_blocks -except ImportError: - from scss._native import locate_blocks - # TODO should mention logging for the programmatic interface in the # documentation diff --git a/scss/expression.py b/scss/expression.py index a249f50..54001f6 100644 --- a/scss/expression.py +++ b/scss/expression.py @@ -4,20 +4,17 @@ from __future__ import unicode_literals import sys import logging -import operator -import re from warnings import warn import six from scss.cssdefs import _expr_glob_re, _interpolate_re from scss.errors import SassError, SassEvaluationError, SassParseError +from scss.grammar.expression import SassExpression, SassExpressionScanner from scss.rule import Namespace from scss.types import String from scss.util import dequote -from scss._grammar import SassExpression, SassExpressionScanner - log = logging.getLogger(__name__) diff --git a/scss/grammar/__init__.py b/scss/grammar/__init__.py new file mode 100644 index 0000000..ffd5ca8 --- /dev/null +++ b/scss/grammar/__init__.py @@ -0,0 +1,9 @@ +"""Grammar and parser plumbing for Sass. Much of this is generated or compiled +in some fashion. +""" +from .scanner import NoMoreTokens +from .scanner import Parser +from .scanner import Scanner +from .scanner import locate_blocks + +__all__ = ('NoMoreTokens', 'Parser', 'Scanner', 'locate_blocks') diff --git a/scss/grammar/expression.g b/scss/grammar/expression.g new file mode 100644 index 0000000..9cc833d --- /dev/null +++ b/scss/grammar/expression.g @@ -0,0 +1,218 @@ +"""Grammar for parsing Sass expressions.""" +# This is a GENERATED FILE -- DO NOT EDIT DIRECTLY! +# Edit scss/grammar/expression.g, then run: +# +# python2 yapps2.py scss/grammar/expression.g + +import operator +import re + +from scss.ast import Parentheses +from scss.ast import UnaryOp +from scss.ast import BinaryOp +from scss.ast import AnyOp +from scss.ast import AllOp +from scss.ast import NotOp +from scss.ast import CallOp +from scss.ast import Variable +from scss.ast import Literal +from scss.ast import ListLiteral +from scss.ast import MapLiteral +from scss.ast import ArgspecLiteral +from scss.types import Color +from scss.types import Number +from scss.types import String +from scss.types import Url +from scss.util import dequote + +from scss.grammar import Parser +from scss.grammar import Scanner + + +%% +parser SassExpression: + ignore: "[ \r\t\n]+" + token LPAR: "\\(|\\[" + token RPAR: "\\)|\\]" + token END: "$" + token MUL: "[*]" + token DIV: "/" + token ADD: "[+]" + token SUB: "-\s" + token SIGN: "-(?![a-zA-Z_])" + token AND: "(?=" + token LT: "<" + token GT: ">" + token DOTDOTDOT: '[.]{3}' + token KWSTR: "'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'(?=\s*:)" + token STR: "'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'" + token KWQSTR: '"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"(?=\s*:)' + token QSTR: '"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"' + token UNITS: "(? 1 else v[0] }} + + + # Expressions: + rule expr_slst: + or_expr {{ v = [or_expr] }} + ( + or_expr {{ v.append(or_expr) }} + )* {{ return ListLiteral(v, comma=False) if len(v) > 1 else v[0] }} + + rule or_expr: + and_expr {{ v = and_expr }} + ( + OR and_expr {{ v = AnyOp(v, and_expr) }} + )* {{ return v }} + + rule and_expr: + not_expr {{ v = not_expr }} + ( + AND not_expr {{ v = AllOp(v, not_expr) }} + )* {{ return v }} + + rule not_expr: + comparison {{ return comparison }} + | NOT not_expr {{ return NotOp(not_expr) }} + + rule comparison: + a_expr {{ v = a_expr }} + ( + LT a_expr {{ v = BinaryOp(operator.lt, v, a_expr) }} + | GT a_expr {{ v = BinaryOp(operator.gt, v, a_expr) }} + | LE a_expr {{ v = BinaryOp(operator.le, v, a_expr) }} + | GE a_expr {{ v = BinaryOp(operator.ge, v, a_expr) }} + | EQ a_expr {{ v = BinaryOp(operator.eq, v, a_expr) }} + | NE a_expr {{ v = BinaryOp(operator.ne, v, a_expr) }} + )* {{ return v }} + + rule a_expr: + m_expr {{ v = m_expr }} + ( + ADD m_expr {{ v = BinaryOp(operator.add, v, m_expr) }} + | SUB m_expr {{ v = BinaryOp(operator.sub, v, m_expr) }} + )* {{ return v }} + + rule m_expr: + u_expr {{ v = u_expr }} + ( + MUL u_expr {{ v = BinaryOp(operator.mul, v, u_expr) }} + | DIV u_expr {{ v = BinaryOp(operator.truediv, v, u_expr) }} + )* {{ return v }} + + rule u_expr: + SIGN u_expr {{ return UnaryOp(operator.neg, u_expr) }} + | ADD u_expr {{ return UnaryOp(operator.pos, u_expr) }} + | atom {{ return atom }} + + rule atom: + LPAR ( + {{ v = ListLiteral([], comma=False) }} + | expr_map {{ v = expr_map }} + | expr_lst {{ v = expr_lst }} + ) RPAR {{ return Parentheses(v) }} + # Special functions. Note that these technically overlap with the + # regular function rule, which makes this not quite LL -- but they're + # different tokens so yapps can't tell, and it resolves the conflict by + # picking the first one. + | "url" LPAR + ( + URL {{ quotes = None }} + | "\"" URL "\"" {{ quotes = '"' }} + | "'" URL "'" {{ quotes = "'" }} + ) + RPAR {{ return Literal(Url(URL, quotes=quotes)) }} + | FNCT LPAR argspec RPAR {{ return CallOp(FNCT, argspec) }} + | BANG_IMPORTANT {{ return Literal(String(BANG_IMPORTANT, quotes=None)) }} + | ID {{ return Literal.from_bareword(ID) }} + | NUM {{ UNITS = None }} + [ UNITS ] {{ return Literal(Number(float(NUM), unit=UNITS)) }} + | STR {{ return Literal(String(dequote(STR), quotes="'")) }} + | QSTR {{ return Literal(String(dequote(QSTR), quotes='"')) }} + | COLOR {{ return Literal(Color.from_hex(COLOR, literal=True)) }} + | VAR {{ return Variable(VAR) }} + + rule kwatom: + # nothing + | KWID {{ return Literal.from_bareword(KWID) }} + | KWNUM {{ UNITS = None }} + [ UNITS ] {{ return Literal(Number(float(KWNUM), unit=UNITS)) }} + | KWSTR {{ return Literal(String(dequote(KWSTR), quotes="'")) }} + | KWQSTR {{ return Literal(String(dequote(KWQSTR), quotes='"')) }} + | KWCOLOR {{ return Literal(Color.from_hex(KWCOLOR, literal=True)) }} + | KWVAR {{ return Variable(KWVAR) }} + +%% diff --git a/scss/grammar/expression.py b/scss/grammar/expression.py new file mode 100644 index 0000000..aed08b9 --- /dev/null +++ b/scss/grammar/expression.py @@ -0,0 +1,405 @@ +"""Grammar for parsing Sass expressions.""" +# This is a GENERATED FILE -- DO NOT EDIT DIRECTLY! +# Edit scss/grammar/expression.g, then run: +# +# python2 yapps2.py scss/grammar/expression.g + +import operator +import re + +from scss.ast import Parentheses +from scss.ast import UnaryOp +from scss.ast import BinaryOp +from scss.ast import AnyOp +from scss.ast import AllOp +from scss.ast import NotOp +from scss.ast import CallOp +from scss.ast import Variable +from scss.ast import Literal +from scss.ast import ListLiteral +from scss.ast import MapLiteral +from scss.ast import ArgspecLiteral +from scss.types import Color +from scss.types import Number +from scss.types import String +from scss.types import Url +from scss.util import dequote + +from scss.grammar import Parser +from scss.grammar import Scanner + + + +class SassExpressionScanner(Scanner): + patterns = None + _patterns = [ + ('"\'"', "'"), + ('"\\""', '"'), + ('"url"', 'url'), + ('":"', ':'), + ('","', ','), + ('[ \r\t\n]+', '[ \r\t\n]+'), + ('LPAR', '\\(|\\['), + ('RPAR', '\\)|\\]'), + ('END', '$'), + ('MUL', '[*]'), + ('DIV', '/'), + ('ADD', '[+]'), + ('SUB', '-\\s'), + ('SIGN', '-(?![a-zA-Z_])'), + ('AND', '(?='), + ('LT', '<'), + ('GT', '>'), + ('DOTDOTDOT', '[.]{3}'), + ('KWSTR', "'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'(?=\\s*:)"), + ('STR', "'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'"), + ('KWQSTR', '"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"(?=\\s*:)'), + ('QSTR', '"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"'), + ('UNITS', '(? 1 else v[0] + + def expr_slst(self): + or_expr = self.or_expr() + v = [or_expr] + while self._peek(self.expr_slst_rsts) not in self.argspec_items_rsts: + or_expr = self.or_expr() + v.append(or_expr) + return ListLiteral(v, comma=False) if len(v) > 1 else v[0] + + def or_expr(self): + and_expr = self.and_expr() + v = and_expr + while self._peek(self.or_expr_rsts) == 'OR': + OR = self._scan('OR') + and_expr = self.and_expr() + v = AnyOp(v, and_expr) + return v + + def and_expr(self): + not_expr = self.not_expr() + v = not_expr + while self._peek(self.and_expr_rsts) == 'AND': + AND = self._scan('AND') + not_expr = self.not_expr() + v = AllOp(v, not_expr) + return v + + def not_expr(self): + _token_ = self._peek(self.argspec_item_chks) + if _token_ != 'NOT': + comparison = self.comparison() + return comparison + else: # == 'NOT' + NOT = self._scan('NOT') + not_expr = self.not_expr() + return NotOp(not_expr) + + def comparison(self): + a_expr = self.a_expr() + v = a_expr + while self._peek(self.comparison_rsts) in self.comparison_chks: + _token_ = self._peek(self.comparison_chks) + if _token_ == 'LT': + LT = self._scan('LT') + a_expr = self.a_expr() + v = BinaryOp(operator.lt, v, a_expr) + elif _token_ == 'GT': + GT = self._scan('GT') + a_expr = self.a_expr() + v = BinaryOp(operator.gt, v, a_expr) + elif _token_ == 'LE': + LE = self._scan('LE') + a_expr = self.a_expr() + v = BinaryOp(operator.le, v, a_expr) + elif _token_ == 'GE': + GE = self._scan('GE') + a_expr = self.a_expr() + v = BinaryOp(operator.ge, v, a_expr) + elif _token_ == 'EQ': + EQ = self._scan('EQ') + a_expr = self.a_expr() + v = BinaryOp(operator.eq, v, a_expr) + else: # == 'NE' + NE = self._scan('NE') + a_expr = self.a_expr() + v = BinaryOp(operator.ne, v, a_expr) + return v + + def a_expr(self): + m_expr = self.m_expr() + v = m_expr + while self._peek(self.a_expr_rsts) in self.a_expr_chks: + _token_ = self._peek(self.a_expr_chks) + if _token_ == 'ADD': + ADD = self._scan('ADD') + m_expr = self.m_expr() + v = BinaryOp(operator.add, v, m_expr) + else: # == 'SUB' + SUB = self._scan('SUB') + m_expr = self.m_expr() + v = BinaryOp(operator.sub, v, m_expr) + return v + + def m_expr(self): + u_expr = self.u_expr() + v = u_expr + while self._peek(self.m_expr_rsts) in self.m_expr_chks: + _token_ = self._peek(self.m_expr_chks) + if _token_ == 'MUL': + MUL = self._scan('MUL') + u_expr = self.u_expr() + v = BinaryOp(operator.mul, v, u_expr) + else: # == 'DIV' + DIV = self._scan('DIV') + u_expr = self.u_expr() + v = BinaryOp(operator.truediv, v, u_expr) + return v + + def u_expr(self): + _token_ = self._peek(self.u_expr_rsts) + if _token_ == 'SIGN': + SIGN = self._scan('SIGN') + u_expr = self.u_expr() + return UnaryOp(operator.neg, u_expr) + elif _token_ == 'ADD': + ADD = self._scan('ADD') + u_expr = self.u_expr() + return UnaryOp(operator.pos, u_expr) + else: # in self.u_expr_chks + atom = self.atom() + return atom + + def atom(self): + _token_ = self._peek(self.u_expr_chks) + if _token_ == 'LPAR': + LPAR = self._scan('LPAR') + _token_ = self._peek(self.atom_rsts) + if _token_ == 'RPAR': + v = ListLiteral([], comma=False) + elif _token_ not in self.argspec_item_chks: + expr_map = self.expr_map() + v = expr_map + else: # in self.argspec_item_chks + expr_lst = self.expr_lst() + v = expr_lst + RPAR = self._scan('RPAR') + return Parentheses(v) + elif _token_ == '"url"': + self._scan('"url"') + LPAR = self._scan('LPAR') + _token_ = self._peek(self.atom_rsts_) + if _token_ == 'URL': + URL = self._scan('URL') + quotes = None + elif _token_ == '"\\""': + self._scan('"\\""') + URL = self._scan('URL') + self._scan('"\\""') + quotes = '"' + else: # == '"\'"' + self._scan('"\'"') + URL = self._scan('URL') + self._scan('"\'"') + quotes = "'" + RPAR = self._scan('RPAR') + return Literal(Url(URL, quotes=quotes)) + elif _token_ == 'FNCT': + FNCT = self._scan('FNCT') + LPAR = self._scan('LPAR') + argspec = self.argspec() + RPAR = self._scan('RPAR') + return CallOp(FNCT, argspec) + elif _token_ == 'BANG_IMPORTANT': + BANG_IMPORTANT = self._scan('BANG_IMPORTANT') + return Literal(String(BANG_IMPORTANT, quotes=None)) + elif _token_ == 'ID': + ID = self._scan('ID') + return Literal.from_bareword(ID) + elif _token_ == 'NUM': + NUM = self._scan('NUM') + UNITS = None + if self._peek(self.atom_rsts__) == 'UNITS': + UNITS = self._scan('UNITS') + return Literal(Number(float(NUM), unit=UNITS)) + elif _token_ == 'STR': + STR = self._scan('STR') + return Literal(String(dequote(STR), quotes="'")) + elif _token_ == 'QSTR': + QSTR = self._scan('QSTR') + return Literal(String(dequote(QSTR), quotes='"')) + elif _token_ == 'COLOR': + COLOR = self._scan('COLOR') + return Literal(Color.from_hex(COLOR, literal=True)) + else: # == 'VAR' + VAR = self._scan('VAR') + return Variable(VAR) + + def kwatom(self): + _token_ = self._peek(self.kwatom_rsts) + if _token_ == '":"': + pass + elif _token_ == 'KWID': + KWID = self._scan('KWID') + return Literal.from_bareword(KWID) + elif _token_ == 'KWNUM': + KWNUM = self._scan('KWNUM') + UNITS = None + if self._peek(self.kwatom_rsts_) == 'UNITS': + UNITS = self._scan('UNITS') + return Literal(Number(float(KWNUM), unit=UNITS)) + elif _token_ == 'KWSTR': + KWSTR = self._scan('KWSTR') + return Literal(String(dequote(KWSTR), quotes="'")) + elif _token_ == 'KWQSTR': + KWQSTR = self._scan('KWQSTR') + return Literal(String(dequote(KWQSTR), quotes='"')) + elif _token_ == 'KWCOLOR': + KWCOLOR = self._scan('KWCOLOR') + return Literal(Color.from_hex(KWCOLOR, literal=True)) + else: # == 'KWVAR' + KWVAR = self._scan('KWVAR') + return Variable(KWVAR) + + u_expr_chks = set(['"url"', 'LPAR', 'COLOR', 'QSTR', 'NUM', 'FNCT', 'STR', 'VAR', 'BANG_IMPORTANT', 'ID']) + m_expr_rsts = set(['LPAR', 'SUB', 'QSTR', 'RPAR', 'MUL', 'DIV', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'STR', 'VAR', 'EQ', 'ID', 'AND', 'ADD', 'NOT', 'OR', '","']) + argspec_items_rsts = set(['RPAR', 'END', '","']) + expr_map_rsts = set(['RPAR', '","']) + argspec_items_rsts__ = set(['KWVAR', 'LPAR', 'QSTR', 'SLURPYVAR', 'COLOR', 'DOTDOTDOT', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID']) + kwatom_rsts = set(['KWVAR', 'KWID', 'KWSTR', 'KWQSTR', 'KWCOLOR', '":"', 'KWNUM']) + argspec_item_chks = set(['"url"', 'LPAR', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID']) + a_expr_chks = set(['ADD', 'SUB']) + expr_slst_rsts = set(['"url"', 'LPAR', 'END', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', 'RPAR', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID', '","']) + atom_rsts__ = set(['LPAR', 'SUB', 'QSTR', 'RPAR', 'VAR', 'MUL', 'DIV', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'STR', 'UNITS', 'EQ', 'ID', 'AND', 'ADD', 'NOT', 'OR', '","']) + or_expr_rsts = set(['"url"', 'LPAR', 'END', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', 'RPAR', 'FNCT', 'STR', 'NOT', 'ID', 'BANG_IMPORTANT', 'OR', '","']) + and_expr_rsts = set(['AND', 'LPAR', 'RPAR', 'END', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'ID', 'BANG_IMPORTANT', 'OR', '","']) + comparison_rsts = set(['LPAR', 'QSTR', 'RPAR', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', '"url"', 'GT', 'END', 'SIGN', 'ADD', 'FNCT', 'STR', 'VAR', 'EQ', 'ID', 'AND', 'GE', 'NOT', 'OR', '","']) + argspec_chks = set(['DOTDOTDOT', 'SLURPYVAR']) + atom_rsts_ = set(['URL', '"\\""', '"\'"']) + expr_map_rsts_ = set(['KWVAR', 'KWID', 'KWSTR', 'KWQSTR', 'RPAR', 'KWCOLOR', '":"', 'KWNUM', '","']) + u_expr_rsts = set(['"url"', 'LPAR', 'COLOR', 'QSTR', 'SIGN', 'ADD', 'NUM', 'FNCT', 'STR', 'VAR', 'BANG_IMPORTANT', 'ID']) + comparison_chks = set(['GT', 'GE', 'NE', 'LT', 'LE', 'EQ']) + argspec_items_rsts_ = set(['KWVAR', 'LPAR', 'RPAR', 'QSTR', 'END', 'SLURPYVAR', 'COLOR', 'DOTDOTDOT', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID']) + a_expr_rsts = set(['LPAR', 'SUB', 'QSTR', 'RPAR', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'STR', 'VAR', 'EQ', 'ID', 'AND', 'ADD', 'NOT', 'OR', '","']) + m_expr_chks = set(['MUL', 'DIV']) + kwatom_rsts_ = set(['UNITS', '":"']) + argspec_items_chks = set(['KWVAR', '"url"', 'LPAR', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID']) + argspec_rsts = set(['KWVAR', 'LPAR', 'BANG_IMPORTANT', 'END', 'SLURPYVAR', 'COLOR', 'DOTDOTDOT', 'RPAR', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'QSTR', 'SIGN', 'ID']) + atom_rsts = set(['KWVAR', 'KWID', 'KWSTR', 'BANG_IMPORTANT', 'LPAR', 'COLOR', 'KWQSTR', 'SIGN', 'RPAR', 'KWCOLOR', 'VAR', 'ADD', 'NUM', '"url"', '":"', 'STR', 'NOT', 'QSTR', 'KWNUM', 'ID', 'FNCT']) + argspec_chks_ = set(['END', 'RPAR']) + argspec_rsts_ = set(['KWVAR', 'LPAR', 'BANG_IMPORTANT', 'END', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'RPAR', 'ID']) + + diff --git a/scss/grammar/scanner.py b/scss/grammar/scanner.py new file mode 100644 index 0000000..d466254 --- /dev/null +++ b/scss/grammar/scanner.py @@ -0,0 +1,274 @@ +"""Pure-Python scanner and parser, used if the C module is not available.""" +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from collections import deque + +import re + +DEBUG = False + +# TODO copied from __init__ +_blocks_re = re.compile(r'[{},;()\'"\n]') + + +try: + from ._scanner import locate_blocks +except ImportError: + def locate_blocks(codestr): + """ + For processing CSS like strings. + + Either returns all selectors (that can be "smart" multi-lined, as + long as it's joined by `,`, or enclosed in `(` and `)`) with its code block + (the one between `{` and `}`, which can be nested), or the "lose" code + (properties) that doesn't have any blocks. + """ + lineno = 1 + + par = 0 + instr = None + depth = 0 + skip = False + i = init = lose = 0 + start = end = None + lineno_stack = deque() + + for m in _blocks_re.finditer(codestr): + i = m.start(0) + c = codestr[i] + if c == '\n': + lineno += 1 + + if instr is not None: + if c == instr: + instr = None # A string ends (FIXME: needs to accept escaped characters) + elif c in ('"', "'"): + instr = c # A string starts + elif c == '(': # parenthesis begins: + par += 1 + elif c == ')': # parenthesis ends: + par -= 1 + elif not par and not instr: + if c == '{': # block begins: + if depth == 0: + if i > 0 and codestr[i - 1] == '#': # Do not process #{...} as blocks! + skip = True + else: + lineno_stack.append(lineno) + start = i + if lose < init: + _property = codestr[lose:init].strip() + if _property: + yield lineno, _property, None + lose = init + depth += 1 + elif c == '}': # block ends: + if depth <= 0: + raise SyntaxError("Unexpected closing brace on line {0}".format(lineno)) + else: + depth -= 1 + if depth == 0: + if not skip: + end = i + _selectors = codestr[init:start].strip() + _codestr = codestr[start + 1:end].strip() + if _selectors: + yield lineno_stack.pop(), _selectors, _codestr + init = lose = end + 1 + skip = False + elif depth == 0: + if c == ';': # End of property (or block): + init = i + if lose < init: + _property = codestr[lose:init].strip() + if _property: + yield lineno, _property, None + init = lose = i + 1 + if depth > 0: + if not skip: + _selectors = codestr[init:start].strip() + _codestr = codestr[start + 1:].strip() + if _selectors: + yield lineno, _selectors, _codestr + if par: + raise Exception("Missing closing parenthesis somewhere in block: '%s'" % _selectors) + elif instr: + raise Exception("Missing closing string somewhere in block: '%s'" % _selectors) + else: + raise Exception("Block never closed: '%s'" % _selectors) + losestr = codestr[lose:] + for _property in losestr.split(';'): + _property = _property.strip() + lineno += _property.count('\n') + if _property: + yield lineno, _property, None + + +################################################################################ +# Parser + +# NOTE: This class has no C equivalent +class Parser(object): + def __init__(self, scanner): + self._scanner = scanner + self._pos = 0 + self._char_pos = 0 + + def reset(self, input): + self._scanner.reset(input) + self._pos = 0 + self._char_pos = 0 + + def _peek(self, types): + """ + Returns the token type for lookahead; if there are any args + then the list of args is the set of token types to allow + """ + try: + tok = self._scanner.token(self._pos, types) + return tok[2] + except SyntaxError: + return None + + def _scan(self, type): + """ + Returns the matched text, and moves to the next token + """ + tok = self._scanner.token(self._pos, set([type])) + self._char_pos = tok[0] + if tok[2] != type: + raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(tok[0]), "Trying to find " + type)) + self._pos += 1 + return tok[3] + + +try: + from ._scanner import NoMoreTokens +except ImportError: + class NoMoreTokens(Exception): + """ + Another exception object, for when we run out of tokens + """ + pass + + +try: + from ._scanner import Scanner +except ImportError: + class Scanner(object): + def __init__(self, patterns, ignore, input=None): + """ + Patterns is [(terminal,regex)...] + Ignore is [terminal,...]; + Input is a string + """ + self.reset(input) + self.ignore = ignore + # The stored patterns are a pair (compiled regex,source + # regex). If the patterns variable passed in to the + # constructor is None, we assume that the class already has a + # proper .patterns list constructed + if patterns is not None: + self.patterns = [] + for k, r in patterns: + self.patterns.append((k, re.compile(r))) + + def reset(self, input): + self.tokens = [] + self.restrictions = [] + self.input = input + self.pos = 0 + + def __repr__(self): + """ + Print the last 10 tokens that have been scanned in + """ + output = '' + for t in self.tokens[-10:]: + output = "%s\n (@%s) %s = %s" % (output, t[0], t[2], repr(t[3])) + return output + + def _scan(self, restrict): + """ + Should scan another token and add it to the list, self.tokens, + and add the restriction to self.restrictions + """ + # Keep looking for a token, ignoring any in self.ignore + token = None + while True: + best_pat = None + # Search the patterns for a match, with earlier + # tokens in the list having preference + best_pat_len = 0 + for tok, regex in self.patterns: + if DEBUG: + print("\tTrying %s: %s at pos %d -> %s" % (repr(tok), repr(regex.pattern), self.pos, repr(self.input))) + # First check to see if we're restricting to this token + if restrict and tok not in restrict and tok not in self.ignore: + if DEBUG: + print("\tSkipping %r!" % (tok,)) + continue + m = regex.match(self.input, self.pos) + if m: + # We got a match + best_pat = tok + best_pat_len = len(m.group(0)) + if DEBUG: + print("Match OK! %s: %s at pos %d" % (repr(tok), repr(regex.pattern), self.pos)) + break + + # If we didn't find anything, raise an error + if best_pat is None: + msg = "Bad token found" + if restrict: + msg = "Bad token found while trying to find one of the restricted tokens: %s" % (", ".join(repr(r) for r in restrict)) + raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(self.pos), msg)) + + # If we found something that isn't to be ignored, return it + if best_pat in self.ignore: + # This token should be ignored... + self.pos += best_pat_len + else: + end_pos = self.pos + best_pat_len + # Create a token with this data + token = ( + self.pos, + end_pos, + best_pat, + self.input[self.pos:end_pos] + ) + break + if token is not None: + self.pos = token[1] + # Only add this token if it's not in the list + # (to prevent looping) + if not self.tokens or token != self.tokens[-1]: + self.tokens.append(token) + self.restrictions.append(restrict) + return 1 + return 0 + + def token(self, i, restrict=None): + """ + Get the i'th token, and if i is one past the end, then scan + for another token; restrict is a list of tokens that + are allowed, or 0 for any token. + """ + tokens_len = len(self.tokens) + if i == tokens_len: # We are at the end, get the next... + tokens_len += self._scan(restrict) + if i < tokens_len: + if restrict and self.restrictions[i] and restrict > self.restrictions[i]: + raise NotImplementedError("Unimplemented: restriction set changed") + return self.tokens[i] + raise NoMoreTokens + + def rewind(self, i): + tokens_len = len(self.tokens) + if i <= tokens_len: + token = self.tokens[i] + self.tokens = self.tokens[:i] + self.restrictions = self.restrictions[:i] + self.pos = token[0] diff --git a/scss/src/_speedups.c b/scss/src/_speedups.c index 460cac3..66c8d36 100644 --- a/scss/src/_speedups.c +++ b/scss/src/_speedups.c @@ -566,7 +566,7 @@ static PyMethodDef scss_methods[] = { static struct PyModuleDef speedups_module_def = { PyModuleDef_HEAD_INIT, - "_speedups", /* m_name */ + "_scanner", /* m_name */ NULL, /* m_doc */ (Py_ssize_t) -1, /* m_size */ scss_methods, /* m_methods */ @@ -586,12 +586,12 @@ static struct PyModuleDef speedups_module_def = { #define MOD_INIT(name) PyMODINIT_FUNC init##name(void) #endif -MOD_INIT(_speedups) +MOD_INIT(_scanner) { #if PY_MAJOR_VERSION >= 3 PyObject* m = PyModule_Create(&speedups_module_def); #else - PyObject* m = Py_InitModule("_speedups", scss_methods); + PyObject* m = Py_InitModule("_scanner", scss_methods); #endif scss_BlockLocatorType.tp_new = PyType_GenericNew; @@ -613,7 +613,7 @@ MOD_INIT(_speedups) Py_INCREF(&scss_ScannerType); PyModule_AddObject(m, "Scanner", (PyObject *)&scss_ScannerType); - PyExc_scss_NoMoreTokens = PyErr_NewException("_speedups.NoMoreTokens", NULL, NULL); + PyExc_scss_NoMoreTokens = PyErr_NewException("_scanner.NoMoreTokens", NULL, NULL); Py_INCREF(PyExc_scss_NoMoreTokens); PyModule_AddObject(m, "NoMoreTokens", (PyObject *)PyExc_scss_NoMoreTokens); #if PY_MAJOR_VERSION >= 3 diff --git a/scss/src/grammar/LICENSE b/scss/src/grammar/LICENSE deleted file mode 100644 index 64f38b8..0000000 --- a/scss/src/grammar/LICENSE +++ /dev/null @@ -1,18 +0,0 @@ -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/scss/src/grammar/README b/scss/src/grammar/README deleted file mode 100644 index 5a13dde..0000000 --- a/scss/src/grammar/README +++ /dev/null @@ -1,6 +0,0 @@ -To build the parser from the grammar do: - `python ./yapps2.py grammar.g` - -This will produce `grammar.py`; to that file modify the UNITS to be: '(?=" - token LT: "<" - token GT: ">" - token DOTDOTDOT: '[.]{3}' - token KWSTR: "'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'(?=\s*:)" - token STR: "'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'" - token KWQSTR: '"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"(?=\s*:)' - token QSTR: '"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"' - token UNITS: "(? 1 else v[0] }} - - - # Expressions: - rule expr_slst: - or_expr {{ v = [or_expr] }} - ( - or_expr {{ v.append(or_expr) }} - )* {{ return ListLiteral(v, comma=False) if len(v) > 1 else v[0] }} - - rule or_expr: - and_expr {{ v = and_expr }} - ( - OR and_expr {{ v = AnyOp(v, and_expr) }} - )* {{ return v }} - - rule and_expr: - not_expr {{ v = not_expr }} - ( - AND not_expr {{ v = AllOp(v, not_expr) }} - )* {{ return v }} - - rule not_expr: - comparison {{ return comparison }} - | NOT not_expr {{ return NotOp(not_expr) }} - - rule comparison: - a_expr {{ v = a_expr }} - ( - LT a_expr {{ v = BinaryOp(operator.lt, v, a_expr) }} - | GT a_expr {{ v = BinaryOp(operator.gt, v, a_expr) }} - | LE a_expr {{ v = BinaryOp(operator.le, v, a_expr) }} - | GE a_expr {{ v = BinaryOp(operator.ge, v, a_expr) }} - | EQ a_expr {{ v = BinaryOp(operator.eq, v, a_expr) }} - | NE a_expr {{ v = BinaryOp(operator.ne, v, a_expr) }} - )* {{ return v }} - - rule a_expr: - m_expr {{ v = m_expr }} - ( - ADD m_expr {{ v = BinaryOp(operator.add, v, m_expr) }} - | SUB m_expr {{ v = BinaryOp(operator.sub, v, m_expr) }} - )* {{ return v }} - - rule m_expr: - u_expr {{ v = u_expr }} - ( - MUL u_expr {{ v = BinaryOp(operator.mul, v, u_expr) }} - | DIV u_expr {{ v = BinaryOp(operator.truediv, v, u_expr) }} - )* {{ return v }} - - rule u_expr: - SIGN u_expr {{ return UnaryOp(operator.neg, u_expr) }} - | ADD u_expr {{ return UnaryOp(operator.pos, u_expr) }} - | atom {{ return atom }} - - rule atom: - LPAR ( - {{ v = ListLiteral([], comma=False) }} - | expr_map {{ v = expr_map }} - | expr_lst {{ v = expr_lst }} - ) RPAR {{ return Parentheses(v) }} - # Special functions. Note that these technically overlap with the - # regular function rule, which makes this not quite LL -- but they're - # different tokens so yapps can't tell, and it resolves the conflict by - # picking the first one. - | "url" LPAR - ( - URL {{ quotes = None }} - | "\"" URL "\"" {{ quotes = '"' }} - | "'" URL "'" {{ quotes = "'" }} - ) - RPAR {{ return Literal(Url(URL, quotes=quotes)) }} - | FNCT LPAR argspec RPAR {{ return CallOp(FNCT, argspec) }} - | BANG_IMPORTANT {{ return Literal(String(BANG_IMPORTANT, quotes=None)) }} - | ID {{ return Literal.from_bareword(ID) }} - | NUM {{ UNITS = None }} - [ UNITS ] {{ return Literal(Number(float(NUM), unit=UNITS)) }} - | STR {{ return Literal(String(dequote(STR), quotes="'")) }} - | QSTR {{ return Literal(String(dequote(QSTR), quotes='"')) }} - | COLOR {{ return Literal(Color.from_hex(COLOR, literal=True)) }} - | VAR {{ return Variable(VAR) }} - - rule kwatom: - # nothing - | KWID {{ return Literal.from_bareword(KWID) }} - | KWNUM {{ UNITS = None }} - [ UNITS ] {{ return Literal(Number(float(KWNUM), unit=UNITS)) }} - | KWSTR {{ return Literal(String(dequote(KWSTR), quotes="'")) }} - | KWQSTR {{ return Literal(String(dequote(KWQSTR), quotes='"')) }} - | KWCOLOR {{ return Literal(Color.from_hex(KWCOLOR, literal=True)) }} - | KWVAR {{ return Variable(KWVAR) }} - -%% diff --git a/scss/src/grammar/yapps2.py b/scss/src/grammar/yapps2.py deleted file mode 100755 index 1cea3d3..0000000 --- a/scss/src/grammar/yapps2.py +++ /dev/null @@ -1,896 +0,0 @@ -#!/usr/bin/env python - -# Yapps 3.0 - yet another python parser system -# Amit J Patel, January 1999 -# German M. Bravo, December 2011 -# See http://theory.stanford.edu/~amitp/Yapps/ for documentation and updates - -# v3.0.0 changes (December 2011) -# * PEP 8 cleanups -# * Optimizations in the scanning (added cache and cleanup() for it) -# v2.0.1 changes (October 2001): -# * The exceptions inherit the standard Exception class (thanks Rich Salz) -# * The scanner can use either a different set of regular expressions -# per instance, or allows the subclass to define class fields with -# the patterns. This improves performance when many Scanner objects -# are being created, because the regular expressions don't have to -# be recompiled each time. (thanks Amaury Forgeot d'Arc) -# v2.0.2 changes (April 2002) -# * Bug fix: generating the 'else' clause when the comment was too -# long. v2.0.1 was missing a newline. (thanks Steven Engelhardt) -# v2.0.3 changes (August 2002) -# * Bug fix: inline tokens using the r"" syntax. -# v.2.0.4 changes (July 2003) -# * Style change: Replaced `expr` with repr(expr) -# * Style change: Changed (b >= a and b < c) into (a <= b < c) -# * Bug fix: identifiers in grammar rules that had digits in them were -# not accessible in the {{python code}} section -# * Bug fix: made the SyntaxError exception class call -# Exception.__init__ (thanks Alex Verstak) -# * Style change: replaced raise "string exception" with raise -# ClassException(...) (thanks Alex Verstak) - -from yappsrt import * -import sys -import re - - -INDENT = " " * 4 - - -class Generator: - def __init__(self, name, options, tokens, rules): - self.change_count = 0 - self.name = name - self.options = options - self.preparser = '' - self.postparser = None - - self.tokens = {} # Map from tokens to regexps - self.sets = {} # Map for restriction sets - self.ignore = [] # List of token names to ignore in parsing - self.terminals = [] # List of token names (to maintain ordering) - - for n, t in tokens: - if n == '#ignore': - n = t - self.ignore.append(n) - if n in self.tokens.keys() and self.tokens[n] != t: - if n not in self.ignore: - print 'Warning: token', n, 'multiply defined.' - else: - self.terminals.append(n) - self.tokens[n] = t - - self.rules = {} # Map from rule names to parser nodes - self.params = {} # Map from rule names to parameters - self.goals = [] # List of rule names (to maintain ordering) - for n, p, r in rules: - self.params[n] = p - self.rules[n] = r - self.goals.append(n) - - self.output = sys.stdout - - def __getitem__(self, name): - # Get options - return self.options.get(name, 0) - - def non_ignored_tokens(self): - return filter(lambda x, i=self.ignore: x not in i, self.terminals) - - def changed(self): - self.change_count = 1 + self.change_count - - def subset(self, a, b): - "See if all elements of a are inside b" - for x in a: - if x not in b: - return 0 - return 1 - - def equal_set(self, a, b): - "See if a and b have the same elements" - if len(a) != len(b): - return 0 - if a == b: - return 1 - return self.subset(a, b) and self.subset(b, a) - - def add_to(self, parent, additions): - "Modify parent to include all elements in additions" - for x in additions: - if x not in parent: - parent.append(x) - self.changed() - - def equate(self, a, b): - self.add_to(a, b) - self.add_to(b, a) - - def write(self, *args): - for a in args: - self.output.write(a) - - def in_test(self, r, x, full, b): - if not b: - return '0' - if len(b) == 1: - return '%s == %s' % (x, repr(b[0])) - if full and len(b) > len(full) / 2: - # Reverse the sense of the test. - not_b = filter(lambda x, b=b: - x not in b, full) - return self.not_in_test(r, x, full, not_b) - n = None - for k, v in self.sets.items(): - if v == b: - n = k - if n is None: - n = '%s_chks' % r - while n in self.sets: - n += '_' - self.sets[n] = b - b_set = 'self.%s' % n - return '%s in %s' % (x, b_set) - - def not_in_test(self, r, x, full, b): - if not b: - return '1' - if len(b) == 1: - return '%s != %s' % (x, repr(b[0])) - n = None - for k, v in self.sets.items(): - if v == b: - n = k - if n is None: - n = '%s_chks' % r - while n in self.sets: - n += '_' - self.sets[n] = b - b_set = 'self.%s' % n - return '%s not in %s' % (x, b_set) - - def peek_call(self, r, a): - n = None - for k, v in self.sets.items(): - if v == a: - n = k - if n is None: - n = '%s_rsts' % r - while n in self.sets: - n += '_' - self.sets[n] = a - a_set = 'self.%s' % n - if self.equal_set(a, self.non_ignored_tokens()): - a_set = '' - if self['context-insensitive-scanner']: - a_set = '' - return 'self._peek(%s)' % a_set - - def peek_test(self, r, a, b): - if self.subset(a, b): - return '1' - if self['context-insensitive-scanner']: - a = self.non_ignored_tokens() - return self.in_test(r, self.peek_call(r, a), a, b) - - def not_peek_test(self, r, a, b): - if self.subset(a, b): - return '0' - return self.not_in_test(r, self.peek_call(r, a), a, b) - - def calculate(self): - while 1: - for r in self.goals: - self.rules[r].setup(self, r) - if self.change_count == 0: - break - self.change_count = 0 - - while 1: - for r in self.goals: - self.rules[r].update(self) - if self.change_count == 0: - break - self.change_count = 0 - - def dump_information(self): - self.calculate() - for r in self.goals: - print ' _____' + '_' * len(r) - print ('___/Rule ' + r + '\\' + '_' * 80)[:79] - queue = [self.rules[r]] - while queue: - top = queue[0] - del queue[0] - - print repr(top) - top.first.sort() - top.follow.sort() - eps = [] - if top.accepts_epsilon: - eps = ['(null)'] - print ' FIRST:', join(top.first + eps, ', ') - print ' FOLLOW:', join(top.follow, ', ') - for x in top.get_children(): - queue.append(x) - - def generate_output(self): - - self.calculate() - self.write(self.preparser) - self.write("class ", self.name, "Scanner(Scanner):\n") - self.write(" patterns = None\n") - self.write(" _patterns = [\n") - for p in self.terminals: - self.write(" (%s, %s),\n" % ( - repr(p), repr(self.tokens[p]))) - self.write(" ]\n\n") - self.write(" def __init__(self, input=None):\n") - self.write(" if hasattr(self, 'setup_patterns'):\n") - self.write(" self.setup_patterns(self._patterns)\n") - self.write(" elif self.patterns is None:\n") - self.write(" self.__class__.patterns = []\n") - self.write(" for t, p in self._patterns:\n") - self.write(" self.patterns.append((t, re.compile(p)))\n") - self.write(" super(", self.name, "Scanner, self).__init__(None, %s, input)\n" % - repr(self.ignore)) - self.write("\n\n") - - self.write("class ", self.name, "(Parser):\n") - for r in self.goals: - self.write(INDENT, "def ", r, "(self") - if self.params[r]: - self.write(", ", self.params[r]) - self.write("):\n") - self.rules[r].output(self, INDENT + INDENT) - self.write("\n") - - for n, s in self.sets.items(): - self.write(" %s = %s\n" % (n, set(s))) - - if self.postparser is not None: - self.write(self.postparser) - else: - self.write("\n") - self.write("P = ", self.name, "(", self.name, "Scanner())\n") - self.write("def parse(rule, text, *args):\n") - self.write(" P.reset(text)\n") - self.write(" return wrap_error_reporter(P, rule, *args)\n") - self.write("\n") - - self.write("if __name__ == '__main__':\n") - self.write(INDENT, "from sys import argv, stdin\n") - self.write(INDENT, "if len(argv) >= 2:\n") - self.write(INDENT * 2, "if len(argv) >= 3:\n") - self.write(INDENT * 3, "f = open(argv[2],'r')\n") - self.write(INDENT * 2, "else:\n") - self.write(INDENT * 3, "f = stdin\n") - self.write(INDENT * 2, "print parse(argv[1], f.read())\n") - self.write(INDENT, "else: print 'Args: []'\n") - - -###################################################################### - - -class Node: - def __init__(self): - self.first = [] - self.follow = [] - self.accepts_epsilon = 0 - self.rule = '?' - - def setup(self, gen, rule): - # Setup will change accepts_epsilon, - # sometimes from 0 to 1 but never 1 to 0. - # It will take a finite number of steps to set things up - self.rule = rule - - def used(self, vars): - "Return two lists: one of vars used, and the other of vars assigned" - return vars, [] - - def get_children(self): - "Return a list of sub-nodes" - return [] - - def __repr__(self): - return str(self) - - def update(self, gen): - if self.accepts_epsilon: - gen.add_to(self.first, self.follow) - - def output(self, gen, indent): - "Write out code to _gen_ with _indent_:string indentation" - gen.write(indent, "assert 0 # Invalid parser node\n") - - -class Terminal(Node): - def __init__(self, token): - Node.__init__(self) - self.token = token - self.accepts_epsilon = 0 - - def __str__(self): - return self.token - - def update(self, gen): - Node.update(self, gen) - if self.first != [self.token]: - self.first = [self.token] - gen.changed() - - def output(self, gen, indent): - gen.write(indent) - if re.match('[a-zA-Z_][a-zA-Z_0-9]*$', self.token): - gen.write(self.token, " = ") - gen.write("self._scan(%s)\n" % repr(self.token)) - - -class Eval(Node): - def __init__(self, expr): - Node.__init__(self) - self.expr = expr - - def setup(self, gen, rule): - Node.setup(self, gen, rule) - if not self.accepts_epsilon: - self.accepts_epsilon = 1 - gen.changed() - - def __str__(self): - return '{{ %s }}' % self.expr.strip() - - def output(self, gen, indent): - gen.write(indent, self.expr.strip(), '\n') - - -class NonTerminal(Node): - def __init__(self, name, args): - Node.__init__(self) - self.name = name - self.args = args - - def setup(self, gen, rule): - Node.setup(self, gen, rule) - try: - self.target = gen.rules[self.name] - if self.accepts_epsilon != self.target.accepts_epsilon: - self.accepts_epsilon = self.target.accepts_epsilon - gen.changed() - except KeyError: # Oops, it's nonexistent - print 'Error: no rule <%s>' % self.name - self.target = self - - def __str__(self): - return '<%s>' % self.name - - def update(self, gen): - Node.update(self, gen) - gen.equate(self.first, self.target.first) - gen.equate(self.follow, self.target.follow) - - def output(self, gen, indent): - gen.write(indent) - gen.write(self.name, " = ") - gen.write("self.", self.name, "(", self.args, ")\n") - - -class Sequence(Node): - def __init__(self, *children): - Node.__init__(self) - self.children = children - - def setup(self, gen, rule): - Node.setup(self, gen, rule) - for c in self.children: - c.setup(gen, rule) - - if not self.accepts_epsilon: - # If it's not already accepting epsilon, it might now do so. - for c in self.children: - # any non-epsilon means all is non-epsilon - if not c.accepts_epsilon: - break - else: - self.accepts_epsilon = 1 - gen.changed() - - def get_children(self): - return self.children - - def __str__(self): - return '( %s )' % join(map(lambda x: str(x), self.children)) - - def update(self, gen): - Node.update(self, gen) - for g in self.children: - g.update(gen) - - empty = 1 - for g_i in range(len(self.children)): - g = self.children[g_i] - - if empty: - gen.add_to(self.first, g.first) - if not g.accepts_epsilon: - empty = 0 - - if g_i == len(self.children) - 1: - next = self.follow - else: - next = self.children[1 + g_i].first - gen.add_to(g.follow, next) - - if self.children: - gen.add_to(self.follow, self.children[-1].follow) - - def output(self, gen, indent): - if self.children: - for c in self.children: - c.output(gen, indent) - else: - # Placeholder for empty sequences, just in case - gen.write(indent, 'pass\n') - -class Choice(Node): - def __init__(self, *children): - Node.__init__(self) - self.children = children - - def setup(self, gen, rule): - Node.setup(self, gen, rule) - for c in self.children: - c.setup(gen, rule) - - if not self.accepts_epsilon: - for c in self.children: - if c.accepts_epsilon: - self.accepts_epsilon = 1 - gen.changed() - - def get_children(self): - return self.children - - def __str__(self): - return '( %s )' % join(map(lambda x: str(x), self.children), ' | ') - - def update(self, gen): - Node.update(self, gen) - for g in self.children: - g.update(gen) - - for g in self.children: - gen.add_to(self.first, g.first) - gen.add_to(self.follow, g.follow) - for g in self.children: - gen.add_to(g.follow, self.follow) - if self.accepts_epsilon: - gen.add_to(self.first, self.follow) - - def output(self, gen, indent): - test = "if" - gen.write(indent, "_token_ = ", gen.peek_call(self.rule, self.first), "\n") - tokens_seen = [] - tokens_unseen = self.first[:] - if gen['context-insensitive-scanner']: - # Context insensitive scanners can return ANY token, - # not only the ones in first. - tokens_unseen = gen.non_ignored_tokens() - for c in self.children: - testset = c.first[:] - removed = [] - for x in testset: - if x in tokens_seen: - testset.remove(x) - removed.append(x) - if x in tokens_unseen: - tokens_unseen.remove(x) - tokens_seen = tokens_seen + testset - if removed: - if not testset: - print 'Error in rule', self.rule + ':', c, 'never matches.' - else: - print 'Warning:', self - print ' * These tokens are being ignored:', join(removed, ', ') - print ' due to previous choices using them.' - - if testset: - if not tokens_unseen: # context sensitive scanners only! - if test == 'if': - # if it's the first AND last test, then - # we can simply put the code without an if/else - c.output(gen, indent) - else: - gen.write(indent, "else:") - t = gen.in_test(self.rule, '', [], testset) - if len(t) < 70 - len(indent): - gen.write(" #", t) - gen.write("\n") - c.output(gen, indent + INDENT) - else: - gen.write(indent, test, " ", - gen.in_test(self.rule, '_token_', tokens_unseen, testset), - ":\n") - c.output(gen, indent + INDENT) - test = "elif" - - if gen['context-insensitive-scanner'] and tokens_unseen: - gen.write(indent, "else:\n") - gen.write(indent, INDENT, "raise SyntaxError(self._pos, ") - gen.write("'Could not match ", self.rule, "')\n") - - -class Wrapper(Node): - def __init__(self, child): - Node.__init__(self) - self.child = child - - def setup(self, gen, rule): - Node.setup(self, gen, rule) - self.child.setup(gen, rule) - - def get_children(self): - return [self.child] - - def update(self, gen): - Node.update(self, gen) - self.child.update(gen) - gen.add_to(self.first, self.child.first) - gen.equate(self.follow, self.child.follow) - - -class Option(Wrapper): - def setup(self, gen, rule): - Wrapper.setup(self, gen, rule) - if not self.accepts_epsilon: - self.accepts_epsilon = 1 - gen.changed() - - def __str__(self): - return '[ %s ]' % str(self.child) - - def output(self, gen, indent): - if self.child.accepts_epsilon: - print 'Warning in rule', self.rule + ': contents may be empty.' - gen.write(indent, "if %s:\n" % - gen.peek_test(self.rule, self.first, self.child.first)) - self.child.output(gen, indent + INDENT) - - -class Plus(Wrapper): - def setup(self, gen, rule): - Wrapper.setup(self, gen, rule) - if self.accepts_epsilon != self.child.accepts_epsilon: - self.accepts_epsilon = self.child.accepts_epsilon - gen.changed() - - def __str__(self): - return '%s+' % str(self.child) - - def update(self, gen): - Wrapper.update(self, gen) - gen.add_to(self.follow, self.first) - - def output(self, gen, indent): - if self.child.accepts_epsilon: - print 'Warning in rule', self.rule + ':' - print ' * The repeated pattern could be empty. The resulting' - print ' parser may not work properly.' - gen.write(indent, "while 1:\n") - self.child.output(gen, indent + INDENT) - union = self.first[:] - gen.add_to(union, self.follow) - gen.write(indent + INDENT, "if %s:\n" % - gen.not_peek_test(self.rule, union, self.child.first)) - gen.write(indent + INDENT * 2, "break\n") - - -class Star(Plus): - def setup(self, gen, rule): - Wrapper.setup(self, gen, rule) - if not self.accepts_epsilon: - self.accepts_epsilon = 1 - gen.changed() - - def __str__(self): - return '%s*' % str(self.child) - - def output(self, gen, indent): - if self.child.accepts_epsilon: - print 'Warning in rule', self.rule + ':' - print ' * The repeated pattern could be empty. The resulting' - print ' parser probably will not work properly.' - gen.write(indent, "while %s:\n" % - gen.peek_test(self.rule, self.follow, self.child.first)) - self.child.output(gen, indent + INDENT) - -###################################################################### -# The remainder of this file is from parsedesc.{g,py} - - -def append(lst, x): - "Imperative append" - lst.append(x) - return lst - - -def add_inline_token(tokens, str): - tokens.insert(0, (str, eval(str, {}, {}))) - return Terminal(str) - - -def cleanup_choice(lst): - if len(lst) == 0: - return Sequence([]) - if len(lst) == 1: - return lst[0] - return apply(Choice, tuple(lst)) - - -def cleanup_sequence(lst): - if len(lst) == 1: - return lst[0] - return apply(Sequence, tuple(lst)) - - -def cleanup_rep(node, rep): - if rep == 'star': - return Star(node) - elif rep == 'plus': - return Plus(node) - else: - return node - - -def resolve_name(tokens, id, args): - if id in map(lambda x: x[0], tokens): - # It's a token - if args: - print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args) - return Terminal(id) - else: - # It's a name, so assume it's a nonterminal - return NonTerminal(id, args) - - -from string import * -from yappsrt import * - - -class ParserDescriptionScanner(Scanner): - def __init__(self, str): - Scanner.__init__(self, [ - ('"rule"', 'rule'), - ('"ignore"', 'ignore'), - ('"token"', 'token'), - ('"option"', 'option'), - ('":"', ':'), - ('"parser"', 'parser'), - ('[ \011\015\012]+', '[ \011\015\012]+'), - ('#.*?\015?\012', '#.*?\015?\012'), - ('END', '$'), - ('ATTR', '<<.+?>>'), - ('STMT', '{{.+?}}'), - ('ID', '[a-zA-Z_][a-zA-Z_0-9]*'), - ('STR', '[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"'), - ('LP', '\\('), - ('RP', '\\)'), - ('LB', '\\['), - ('RB', '\\]'), - ('OR', '[|]'), - ('STAR', '[*]'), - ('PLUS', '[+]'), - ], ['[ \011\015\012]+', '#.*?\015?\012'], str) - - -class ParserDescription(Parser): - def Parser(self): - self._scan('"parser"') - ID = self._scan('ID') - self._scan('":"') - Options = self.Options() - Tokens = self.Tokens() - Rules = self.Rules(Tokens) - END = self._scan('END') - return Generator(ID, Options, Tokens, Rules) - - def Options(self): - opt = {} - while self._peek(set(['"option"', '"token"', '"ignore"', 'END', '"rule"'])) == '"option"': - self._scan('"option"') - self._scan('":"') - Str = self.Str() - opt[Str] = 1 - return opt - - def Tokens(self): - tok = [] - while self._peek(set(['"token"', '"ignore"', 'END', '"rule"'])) in ['"token"', '"ignore"']: - _token_ = self._peek(set(['"token"', '"ignore"'])) - if _token_ == '"token"': - self._scan('"token"') - ID = self._scan('ID') - self._scan('":"') - Str = self.Str() - tok.append((ID, Str)) - else: # == '"ignore"' - self._scan('"ignore"') - self._scan('":"') - Str = self.Str() - tok.append(('#ignore', Str)) - return tok - - def Rules(self, tokens): - rul = [] - while self._peek(set(['"rule"', 'END'])) == '"rule"': - self._scan('"rule"') - ID = self._scan('ID') - OptParam = self.OptParam() - self._scan('":"') - ClauseA = self.ClauseA(tokens) - rul.append((ID, OptParam, ClauseA)) - return rul - - def ClauseA(self, tokens): - ClauseB = self.ClauseB(tokens) - v = [ClauseB] - while self._peek(set(['OR', 'RP', 'RB', '"rule"', 'END'])) == 'OR': - OR = self._scan('OR') - ClauseB = self.ClauseB(tokens) - v.append(ClauseB) - return cleanup_choice(v) - - def ClauseB(self, tokens): - v = [] - while self._peek(set(['STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'END'])) in ['STR', 'ID', 'LP', 'LB', 'STMT']: - ClauseC = self.ClauseC(tokens) - v.append(ClauseC) - return cleanup_sequence(v) - - def ClauseC(self, tokens): - ClauseD = self.ClauseD(tokens) - _token_ = self._peek(set(['PLUS', 'STAR', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'END'])) - if _token_ == 'PLUS': - PLUS = self._scan('PLUS') - return Plus(ClauseD) - elif _token_ == 'STAR': - STAR = self._scan('STAR') - return Star(ClauseD) - else: - return ClauseD - - def ClauseD(self, tokens): - _token_ = self._peek(set(['STR', 'ID', 'LP', 'LB', 'STMT'])) - if _token_ == 'STR': - STR = self._scan('STR') - t = (STR, eval(STR, {}, {})) - if t not in tokens: - tokens.insert(0, t) - return Terminal(STR) - elif _token_ == 'ID': - ID = self._scan('ID') - OptParam = self.OptParam() - return resolve_name(tokens, ID, OptParam) - elif _token_ == 'LP': - LP = self._scan('LP') - ClauseA = self.ClauseA(tokens) - RP = self._scan('RP') - return ClauseA - elif _token_ == 'LB': - LB = self._scan('LB') - ClauseA = self.ClauseA(tokens) - RB = self._scan('RB') - return Option(ClauseA) - else: # == 'STMT' - STMT = self._scan('STMT') - return Eval(STMT[2:-2]) - - def OptParam(self): - if self._peek(set(['ATTR', '":"', 'PLUS', 'STAR', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'END'])) == 'ATTR': - ATTR = self._scan('ATTR') - return ATTR[2:-2] - return '' - - def Str(self): - STR = self._scan('STR') - return eval(STR, {}, {}) - - -# This replaces the default main routine - - -yapps_options = [ - ('context-insensitive-scanner', 'context-insensitive-scanner', - 'Scan all tokens (see docs)') - ] - - -def generate(inputfilename, outputfilename='', dump=0, **flags): - """Generate a grammar, given an input filename (X.g) - and an output filename (defaulting to X.py).""" - - if not outputfilename: - if inputfilename[-2:] == '.g': - outputfilename = inputfilename[:-2] + '.py' - else: - raise Exception("Missing output filename") - - print 'Input Grammar:', inputfilename - print 'Output File:', outputfilename - - DIVIDER = '\n%%\n' # This pattern separates the pre/post parsers - preparser, postparser = None, None # Code before and after the parser desc - - # Read the entire file - s = open(inputfilename, 'r').read() - - # See if there's a separation between the pre-parser and parser - f = find(s, DIVIDER) - if f >= 0: - preparser, s = s[:f] + '\n\n', s[f + len(DIVIDER):] - - # See if there's a separation between the parser and post-parser - f = find(s, DIVIDER) - if f >= 0: - s, postparser = s[:f], '\n\n' + s[f + len(DIVIDER):] - - # Create the parser and scanner - p = ParserDescription(ParserDescriptionScanner(s)) - if not p: - return - - # Now parse the file - t = wrap_error_reporter(p, 'Parser') - if not t: - return # Error - if preparser is not None: - t.preparser = preparser - if postparser is not None: - t.postparser = postparser - - # Check the options - for f in t.options.keys(): - for opt, _, _ in yapps_options: - if f == opt: - break - else: - print 'Warning: unrecognized option', f - # Add command line options to the set - for f in flags.keys(): - t.options[f] = flags[f] - - # Generate the output - if dump: - t.dump_information() - else: - t.output = open(outputfilename, 'w') - t.generate_output() - -if __name__ == '__main__': - import getopt - optlist, args = getopt.getopt(sys.argv[1:], 'f:', ['dump']) - if not args or len(args) > 2: - print 'Usage:' - print ' python', sys.argv[0], '[flags] input.g [output.py]' - print 'Flags:' - print (' --dump' + ' ' * 40)[:35] + 'Dump out grammar information' - for flag, _, doc in yapps_options: - print (' -f' + flag + ' ' * 40)[:35] + doc - else: - # Read in the options and create a list of flags - flags = {} - for opt in optlist: - for flag, name, _ in yapps_options: - if opt == ('-f', flag): - flags[name] = 1 - break - else: - if opt == ('--dump', ''): - flags['dump'] = 1 - else: - print 'Warning: unrecognized option', opt[0], opt[1] - - apply(generate, tuple(args), flags) diff --git a/scss/src/grammar/yappsrt.py b/scss/src/grammar/yappsrt.py deleted file mode 100644 index e7c18cd..0000000 --- a/scss/src/grammar/yappsrt.py +++ /dev/null @@ -1,275 +0,0 @@ -# Yapps 3.0 Runtime (by Kronuz) -# -# This module is needed to run generated parsers. - -import re - -try: - from _scss import Scanner, NoMoreTokens -except ImportError: - Scanner = None - -################################################################################ -# Parser - -if not Scanner: - class NoMoreTokens(Exception): - """ - Another exception object, for when we run out of tokens - """ - pass - - class Scanner(object): - def __init__(self, patterns, ignore, input=None): - """ - Patterns is [(terminal,regex)...] - Ignore is [terminal,...]; - Input is a string - """ - self.reset(input) - self.ignore = ignore - # The stored patterns are a pair (compiled regex,source - # regex). If the patterns variable passed in to the - # constructor is None, we assume that the class already has a - # proper .patterns list constructed - if patterns is not None: - self.patterns = [] - for k, r in patterns: - self.patterns.append((k, re.compile(r))) - - def reset(self, input): - self.tokens = [] - self.restrictions = [] - self.input = input - self.pos = 0 - - def __repr__(self): - """ - Print the last 10 tokens that have been scanned in - """ - output = '' - for t in self.tokens[-10:]: - output = "%s\n (@%s) %s = %s" % (output, t[0], t[2], repr(t[3])) - return output - - def _scan(self, restrict): - """ - Should scan another token and add it to the list, self.tokens, - and add the restriction to self.restrictions - """ - # Keep looking for a token, ignoring any in self.ignore - token = None - while True: - best_pat = None - # Search the patterns for a match, with earlier - # tokens in the list having preference - best_pat_len = 0 - for p, regexp in self.patterns: - # First check to see if we're restricting to this token - if restrict and p not in restrict and p not in self.ignore: - continue - m = regexp.match(self.input, self.pos) - if m: - # We got a match - best_pat = p - best_pat_len = len(m.group(0)) - break - - # If we didn't find anything, raise an error - if best_pat is None: - msg = "Bad Token" - if restrict: - msg = "Trying to find one of " + ", ".join(restrict) - raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(self.pos), msg)) - - # If we found something that isn't to be ignored, return it - if best_pat in self.ignore: - # This token should be ignored... - self.pos += best_pat_len - else: - end_pos = self.pos + best_pat_len - # Create a token with this data - token = ( - self.pos, - end_pos, - best_pat, - self.input[self.pos:end_pos] - ) - break - if token is not None: - self.pos = token[1] - # Only add this token if it's not in the list - # (to prevent looping) - if not self.tokens or token != self.tokens[-1]: - self.tokens.append(token) - self.restrictions.append(restrict) - return 1 - return 0 - - def token(self, i, restrict=None): - """ - Get the i'th token, and if i is one past the end, then scan - for another token; restrict is a list of tokens that - are allowed, or 0 for any token. - """ - tokens_len = len(self.tokens) - if i == tokens_len: # We are at the end, get the next... - tokens_len += self._scan(restrict) - if i < tokens_len: - if restrict and self.restrictions[i] and restrict > self.restrictions[i]: - raise NotImplementedError("Unimplemented: restriction set changed") - return self.tokens[i] - raise NoMoreTokens - - def rewind(self, i): - tokens_len = len(self.tokens) - if i <= tokens_len: - token = self.tokens[i] - self.tokens = self.tokens[:i] - self.restrictions = self.restrictions[:i] - self.pos = token[0] - - -class CachedScanner(Scanner): - """ - Same as Scanner, but keeps cached tokens for any given input - """ - _cache_ = {} - _goals_ = ['END'] - - @classmethod - def cleanup(cls): - cls._cache_ = {} - - def __init__(self, patterns, ignore, input=None): - try: - self._tokens = self._cache_[input] - except KeyError: - self._tokens = None - self.__tokens = {} - self.__input = input - super(CachedScanner, self).__init__(patterns, ignore, input) - - def reset(self, input): - try: - self._tokens = self._cache_[input] - except KeyError: - self._tokens = None - self.__tokens = {} - self.__input = input - super(CachedScanner, self).reset(input) - - def __repr__(self): - if self._tokens is None: - return super(CachedScanner, self).__repr__() - output = '' - for t in self._tokens[-10:]: - output = "%s\n (@%s) %s = %s" % (output, t[0], t[2], repr(t[3])) - return output - - def token(self, i, restrict=None): - if self._tokens is None: - token = super(CachedScanner, self).token(i, restrict) - self.__tokens[i] = token - if token[2] in self._goals_: # goal tokens - self._cache_[self.__input] = self._tokens = self.__tokens - return token - else: - token = self._tokens.get(i) - if token is None: - raise NoMoreTokens - return token - - def rewind(self, i): - if self._tokens is None: - super(CachedScanner, self).rewind(i) - - -class Parser(object): - def __init__(self, scanner): - self._scanner = scanner - self._pos = 0 - - def reset(self, input): - self._scanner.reset(input) - self._pos = 0 - - def _peek(self, types): - """ - Returns the token type for lookahead; if there are any args - then the list of args is the set of token types to allow - """ - tok = self._scanner.token(self._pos, types) - return tok[2] - - def _scan(self, type): - """ - Returns the matched text, and moves to the next token - """ - tok = self._scanner.token(self._pos, set([type])) - if tok[2] != type: - raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(tok[0]), "Trying to find " + type)) - self._pos += 1 - return tok[3] - - def _rewind(self, n=1): - self._pos -= min(n, self._pos) - self._scanner.rewind(self._pos) - - -################################################################################ - - -def print_error(input, err, scanner): - """This is a really dumb long function to print error messages nicely.""" - p = err.pos - # Figure out the line number - line = input[:p].count('\n') - print err.msg + " on line " + repr(line + 1) + ":" - # Now try printing part of the line - text = input[max(p - 80, 0): - p + 80] - p = p - max(p - 80, 0) - - # Strip to the left - i = text[:p].rfind('\n') - j = text[:p].rfind('\r') - if i < 0 or (0 <= j < i): - i = j - if 0 <= i < p: - p = p - i - 1 - text = text[i + 1:] - - # Strip to the right - i = text.find('\n', p) - j = text.find('\r', p) - if i < 0 or (0 <= j < i): - i = j - if i >= 0: - text = text[:i] - - # Now shorten the text - while len(text) > 70 and p > 60: - # Cut off 10 chars - text = "..." + text[10:] - p = p - 7 - - # Now print the string, along with an indicator - print '> ', text - print '> ', ' ' * p + '^' - print 'List of nearby tokens:', scanner - - -def wrap_error_reporter(parser, rule, *args): - try: - return getattr(parser, rule)(*args) - except SyntaxError, s: - input = parser._scanner.input - try: - print_error(input, s, parser._scanner) - raise - except ImportError: - print "Syntax Error %s on line %d" % (s.msg, input[:s.pos].count('\n') + 1) - except NoMoreTokens: - print "Could not complete parsing; stopped around here:" - print parser._scanner diff --git a/setup.py b/setup.py index 78534b9..56b95e9 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ speedups = Feature( # NOTE: header files are included by MANIFEST.in; Extension does not # include headers in an sdist (since they're typically in /usr/lib) Extension( - 'scss._speedups', + 'scss.grammar._scanner', sources=['scss/src/_speedups.c', 'scss/src/block_locator.c', 'scss/src/scanner.c'], libraries=['pcre'] ), diff --git a/yapps2.py b/yapps2.py new file mode 100755 index 0000000..e94a7b2 --- /dev/null +++ b/yapps2.py @@ -0,0 +1,1178 @@ +#!/usr/bin/env python +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# Yapps 3.0 - yet another python parser system +# Amit J Patel, January 1999 +# German M. Bravo, December 2011 +# See http://theory.stanford.edu/~amitp/Yapps/ for documentation and updates + +# v3.0.0 changes (December 2011) +# * PEP 8 cleanups +# * Optimizations in the scanning (added cache and cleanup() for it) +# v2.0.1 changes (October 2001): +# * The exceptions inherit the standard Exception class (thanks Rich Salz) +# * The scanner can use either a different set of regular expressions +# per instance, or allows the subclass to define class fields with +# the patterns. This improves performance when many Scanner objects +# are being created, because the regular expressions don't have to +# be recompiled each time. (thanks Amaury Forgeot d'Arc) +# v2.0.2 changes (April 2002) +# * Bug fix: generating the 'else' clause when the comment was too +# long. v2.0.1 was missing a newline. (thanks Steven Engelhardt) +# v2.0.3 changes (August 2002) +# * Bug fix: inline tokens using the r"" syntax. +# v.2.0.4 changes (July 2003) +# * Style change: Replaced `expr` with repr(expr) +# * Style change: Changed (b >= a and b < c) into (a <= b < c) +# * Bug fix: identifiers in grammar rules that had digits in them were +# not accessible in the {{python code}} section +# * Bug fix: made the SyntaxError exception class call +# Exception.__init__ (thanks Alex Verstak) +# * Style change: replaced raise "string exception" with raise +# ClassException(...) (thanks Alex Verstak) + +from string import find +from string import join +import sys +import re + + +INDENT = " " * 4 + + +class Generator: + def __init__(self, name, options, tokens, rules): + self.change_count = 0 + self.name = name + self.options = options + self.preparser = '' + self.postparser = None + + self.tokens = {} # Map from tokens to regexps + self.sets = {} # Map for restriction sets + self.ignore = [] # List of token names to ignore in parsing + self.terminals = [] # List of token names (to maintain ordering) + + for n, t in tokens: + if n == '#ignore': + n = t + self.ignore.append(n) + if n in self.tokens.keys() and self.tokens[n] != t: + if n not in self.ignore: + print 'Warning: token', n, 'multiply defined.' + else: + self.terminals.append(n) + self.tokens[n] = t + + self.rules = {} # Map from rule names to parser nodes + self.params = {} # Map from rule names to parameters + self.goals = [] # List of rule names (to maintain ordering) + for n, p, r in rules: + self.params[n] = p + self.rules[n] = r + self.goals.append(n) + + self.output = sys.stdout + + def __getitem__(self, name): + # Get options + return self.options.get(name, 0) + + def non_ignored_tokens(self): + return filter(lambda x, i=self.ignore: x not in i, self.terminals) + + def changed(self): + self.change_count = 1 + self.change_count + + def subset(self, a, b): + "See if all elements of a are inside b" + for x in a: + if x not in b: + return 0 + return 1 + + def equal_set(self, a, b): + "See if a and b have the same elements" + if len(a) != len(b): + return 0 + if a == b: + return 1 + return self.subset(a, b) and self.subset(b, a) + + def add_to(self, parent, additions): + "Modify parent to include all elements in additions" + for x in additions: + if x not in parent: + parent.append(x) + self.changed() + + def equate(self, a, b): + self.add_to(a, b) + self.add_to(b, a) + + def write(self, *args): + for a in args: + self.output.write(a) + + def in_test(self, r, x, full, b): + if not b: + return '0' + if len(b) == 1: + return '%s == %s' % (x, repr(b[0])) + if full and len(b) > len(full) / 2: + # Reverse the sense of the test. + not_b = filter(lambda x, b=b: + x not in b, full) + return self.not_in_test(r, x, full, not_b) + n = None + for k, v in self.sets.items(): + if v == b: + n = k + if n is None: + n = '%s_chks' % r + while n in self.sets: + n += '_' + self.sets[n] = b + b_set = 'self.%s' % n + return '%s in %s' % (x, b_set) + + def not_in_test(self, r, x, full, b): + if not b: + return '1' + if len(b) == 1: + return '%s != %s' % (x, repr(b[0])) + n = None + for k, v in self.sets.items(): + if v == b: + n = k + if n is None: + n = '%s_chks' % r + while n in self.sets: + n += '_' + self.sets[n] = b + b_set = 'self.%s' % n + return '%s not in %s' % (x, b_set) + + def peek_call(self, r, a): + n = None + for k, v in self.sets.items(): + if v == a: + n = k + if n is None: + n = '%s_rsts' % r + while n in self.sets: + n += '_' + self.sets[n] = a + a_set = 'self.%s' % n + if self.equal_set(a, self.non_ignored_tokens()): + a_set = '' + if self['context-insensitive-scanner']: + a_set = '' + return 'self._peek(%s)' % a_set + + def peek_test(self, r, a, b): + if self.subset(a, b): + return '1' + if self['context-insensitive-scanner']: + a = self.non_ignored_tokens() + return self.in_test(r, self.peek_call(r, a), a, b) + + def not_peek_test(self, r, a, b): + if self.subset(a, b): + return '0' + return self.not_in_test(r, self.peek_call(r, a), a, b) + + def calculate(self): + while 1: + for r in self.goals: + self.rules[r].setup(self, r) + if self.change_count == 0: + break + self.change_count = 0 + + while 1: + for r in self.goals: + self.rules[r].update(self) + if self.change_count == 0: + break + self.change_count = 0 + + def dump_information(self): + self.calculate() + for r in self.goals: + print ' _____' + '_' * len(r) + print ('___/Rule ' + r + '\\' + '_' * 80)[:79] + queue = [self.rules[r]] + while queue: + top = queue[0] + del queue[0] + + print repr(top) + top.first.sort() + top.follow.sort() + eps = [] + if top.accepts_epsilon: + eps = ['(null)'] + print ' FIRST:', join(top.first + eps, ', ') + print ' FOLLOW:', join(top.follow, ', ') + for x in top.get_children(): + queue.append(x) + + def generate_output(self): + + self.calculate() + self.write(self.preparser) + self.write("class ", self.name, "Scanner(Scanner):\n") + self.write(" patterns = None\n") + self.write(" _patterns = [\n") + for p in self.terminals: + self.write(" (%s, %s),\n" % ( + repr(p), repr(self.tokens[p]))) + self.write(" ]\n\n") + self.write(" def __init__(self, input=None):\n") + self.write(" if hasattr(self, 'setup_patterns'):\n") + self.write(" self.setup_patterns(self._patterns)\n") + self.write(" elif self.patterns is None:\n") + self.write(" self.__class__.patterns = []\n") + self.write(" for t, p in self._patterns:\n") + self.write(" self.patterns.append((t, re.compile(p)))\n") + self.write(" super(", self.name, "Scanner, self).__init__(None, %s, input)\n" % + repr(self.ignore)) + self.write("\n\n") + + self.write("class ", self.name, "(Parser):\n") + for r in self.goals: + self.write(INDENT, "def ", r, "(self") + if self.params[r]: + self.write(", ", self.params[r]) + self.write("):\n") + self.rules[r].output(self, INDENT + INDENT) + self.write("\n") + + for n, s in self.sets.items(): + self.write(" %s = %s\n" % (n, set(s))) + + if self.postparser is not None: + self.write(self.postparser) + else: + self.write("\n") + self.write("P = ", self.name, "(", self.name, "Scanner())\n") + self.write("def parse(rule, text, *args):\n") + self.write(" P.reset(text)\n") + self.write(" return wrap_error_reporter(P, rule, *args)\n") + self.write("\n") + + self.write("if __name__ == '__main__':\n") + self.write(INDENT, "from sys import argv, stdin\n") + self.write(INDENT, "if len(argv) >= 2:\n") + self.write(INDENT * 2, "if len(argv) >= 3:\n") + self.write(INDENT * 3, "f = open(argv[2],'r')\n") + self.write(INDENT * 2, "else:\n") + self.write(INDENT * 3, "f = stdin\n") + self.write(INDENT * 2, "print parse(argv[1], f.read())\n") + self.write(INDENT, "else: print 'Args: []'\n") + + +###################################################################### + + +class Node: + def __init__(self): + self.first = [] + self.follow = [] + self.accepts_epsilon = 0 + self.rule = '?' + + def setup(self, gen, rule): + # Setup will change accepts_epsilon, + # sometimes from 0 to 1 but never 1 to 0. + # It will take a finite number of steps to set things up + self.rule = rule + + def used(self, vars): + "Return two lists: one of vars used, and the other of vars assigned" + return vars, [] + + def get_children(self): + "Return a list of sub-nodes" + return [] + + def __repr__(self): + return str(self) + + def update(self, gen): + if self.accepts_epsilon: + gen.add_to(self.first, self.follow) + + def output(self, gen, indent): + "Write out code to _gen_ with _indent_:string indentation" + gen.write(indent, "assert 0 # Invalid parser node\n") + + +class Terminal(Node): + def __init__(self, token): + Node.__init__(self) + self.token = token + self.accepts_epsilon = 0 + + def __str__(self): + return self.token + + def update(self, gen): + Node.update(self, gen) + if self.first != [self.token]: + self.first = [self.token] + gen.changed() + + def output(self, gen, indent): + gen.write(indent) + if re.match('[a-zA-Z_][a-zA-Z_0-9]*$', self.token): + gen.write(self.token, " = ") + gen.write("self._scan(%s)\n" % repr(self.token)) + + +class Eval(Node): + def __init__(self, expr): + Node.__init__(self) + self.expr = expr + + def setup(self, gen, rule): + Node.setup(self, gen, rule) + if not self.accepts_epsilon: + self.accepts_epsilon = 1 + gen.changed() + + def __str__(self): + return '{{ %s }}' % self.expr.strip() + + def output(self, gen, indent): + gen.write(indent, self.expr.strip(), '\n') + + +class NonTerminal(Node): + def __init__(self, name, args): + Node.__init__(self) + self.name = name + self.args = args + + def setup(self, gen, rule): + Node.setup(self, gen, rule) + try: + self.target = gen.rules[self.name] + if self.accepts_epsilon != self.target.accepts_epsilon: + self.accepts_epsilon = self.target.accepts_epsilon + gen.changed() + except KeyError: # Oops, it's nonexistent + print 'Error: no rule <%s>' % self.name + self.target = self + + def __str__(self): + return '<%s>' % self.name + + def update(self, gen): + Node.update(self, gen) + gen.equate(self.first, self.target.first) + gen.equate(self.follow, self.target.follow) + + def output(self, gen, indent): + gen.write(indent) + gen.write(self.name, " = ") + gen.write("self.", self.name, "(", self.args, ")\n") + + +class Sequence(Node): + def __init__(self, *children): + Node.__init__(self) + self.children = children + + def setup(self, gen, rule): + Node.setup(self, gen, rule) + for c in self.children: + c.setup(gen, rule) + + if not self.accepts_epsilon: + # If it's not already accepting epsilon, it might now do so. + for c in self.children: + # any non-epsilon means all is non-epsilon + if not c.accepts_epsilon: + break + else: + self.accepts_epsilon = 1 + gen.changed() + + def get_children(self): + return self.children + + def __str__(self): + return '( %s )' % join(map(lambda x: str(x), self.children)) + + def update(self, gen): + Node.update(self, gen) + for g in self.children: + g.update(gen) + + empty = 1 + for g_i in range(len(self.children)): + g = self.children[g_i] + + if empty: + gen.add_to(self.first, g.first) + if not g.accepts_epsilon: + empty = 0 + + if g_i == len(self.children) - 1: + next = self.follow + else: + next = self.children[1 + g_i].first + gen.add_to(g.follow, next) + + if self.children: + gen.add_to(self.follow, self.children[-1].follow) + + def output(self, gen, indent): + if self.children: + for c in self.children: + c.output(gen, indent) + else: + # Placeholder for empty sequences, just in case + gen.write(indent, 'pass\n') + +class Choice(Node): + def __init__(self, *children): + Node.__init__(self) + self.children = children + + def setup(self, gen, rule): + Node.setup(self, gen, rule) + for c in self.children: + c.setup(gen, rule) + + if not self.accepts_epsilon: + for c in self.children: + if c.accepts_epsilon: + self.accepts_epsilon = 1 + gen.changed() + + def get_children(self): + return self.children + + def __str__(self): + return '( %s )' % join(map(lambda x: str(x), self.children), ' | ') + + def update(self, gen): + Node.update(self, gen) + for g in self.children: + g.update(gen) + + for g in self.children: + gen.add_to(self.first, g.first) + gen.add_to(self.follow, g.follow) + for g in self.children: + gen.add_to(g.follow, self.follow) + if self.accepts_epsilon: + gen.add_to(self.first, self.follow) + + def output(self, gen, indent): + test = "if" + gen.write(indent, "_token_ = ", gen.peek_call(self.rule, self.first), "\n") + tokens_seen = [] + tokens_unseen = self.first[:] + if gen['context-insensitive-scanner']: + # Context insensitive scanners can return ANY token, + # not only the ones in first. + tokens_unseen = gen.non_ignored_tokens() + for c in self.children: + testset = c.first[:] + removed = [] + for x in testset: + if x in tokens_seen: + testset.remove(x) + removed.append(x) + if x in tokens_unseen: + tokens_unseen.remove(x) + tokens_seen = tokens_seen + testset + if removed: + if not testset: + print 'Error in rule', self.rule + ':', c, 'never matches.' + else: + print 'Warning:', self + print ' * These tokens are being ignored:', join(removed, ', ') + print ' due to previous choices using them.' + + if testset: + if not tokens_unseen: # context sensitive scanners only! + if test == 'if': + # if it's the first AND last test, then + # we can simply put the code without an if/else + c.output(gen, indent) + else: + gen.write(indent, "else:") + t = gen.in_test(self.rule, '', [], testset) + if len(t) < 70 - len(indent): + gen.write(" #", t) + gen.write("\n") + c.output(gen, indent + INDENT) + else: + gen.write(indent, test, " ", + gen.in_test(self.rule, '_token_', tokens_unseen, testset), + ":\n") + c.output(gen, indent + INDENT) + test = "elif" + + if gen['context-insensitive-scanner'] and tokens_unseen: + gen.write(indent, "else:\n") + gen.write(indent, INDENT, "raise SyntaxError(self._pos, ") + gen.write("'Could not match ", self.rule, "')\n") + + +class Wrapper(Node): + def __init__(self, child): + Node.__init__(self) + self.child = child + + def setup(self, gen, rule): + Node.setup(self, gen, rule) + self.child.setup(gen, rule) + + def get_children(self): + return [self.child] + + def update(self, gen): + Node.update(self, gen) + self.child.update(gen) + gen.add_to(self.first, self.child.first) + gen.equate(self.follow, self.child.follow) + + +class Option(Wrapper): + def setup(self, gen, rule): + Wrapper.setup(self, gen, rule) + if not self.accepts_epsilon: + self.accepts_epsilon = 1 + gen.changed() + + def __str__(self): + return '[ %s ]' % str(self.child) + + def output(self, gen, indent): + if self.child.accepts_epsilon: + print 'Warning in rule', self.rule + ': contents may be empty.' + gen.write(indent, "if %s:\n" % + gen.peek_test(self.rule, self.first, self.child.first)) + self.child.output(gen, indent + INDENT) + + +class Plus(Wrapper): + def setup(self, gen, rule): + Wrapper.setup(self, gen, rule) + if self.accepts_epsilon != self.child.accepts_epsilon: + self.accepts_epsilon = self.child.accepts_epsilon + gen.changed() + + def __str__(self): + return '%s+' % str(self.child) + + def update(self, gen): + Wrapper.update(self, gen) + gen.add_to(self.follow, self.first) + + def output(self, gen, indent): + if self.child.accepts_epsilon: + print 'Warning in rule', self.rule + ':' + print ' * The repeated pattern could be empty. The resulting' + print ' parser may not work properly.' + gen.write(indent, "while 1:\n") + self.child.output(gen, indent + INDENT) + union = self.first[:] + gen.add_to(union, self.follow) + gen.write(indent + INDENT, "if %s:\n" % + gen.not_peek_test(self.rule, union, self.child.first)) + gen.write(indent + INDENT * 2, "break\n") + + +class Star(Plus): + def setup(self, gen, rule): + Wrapper.setup(self, gen, rule) + if not self.accepts_epsilon: + self.accepts_epsilon = 1 + gen.changed() + + def __str__(self): + return '%s*' % str(self.child) + + def output(self, gen, indent): + if self.child.accepts_epsilon: + print 'Warning in rule', self.rule + ':' + print ' * The repeated pattern could be empty. The resulting' + print ' parser probably will not work properly.' + gen.write(indent, "while %s:\n" % + gen.peek_test(self.rule, self.follow, self.child.first)) + self.child.output(gen, indent + INDENT) + +###################################################################### +# The remainder of this file is from parsedesc.{g,py} + + +def append(lst, x): + "Imperative append" + lst.append(x) + return lst + + +def add_inline_token(tokens, str): + tokens.insert(0, (str, eval(str, {}, {}))) + return Terminal(str) + + +def cleanup_choice(lst): + if len(lst) == 0: + return Sequence([]) + if len(lst) == 1: + return lst[0] + return apply(Choice, tuple(lst)) + + +def cleanup_sequence(lst): + if len(lst) == 1: + return lst[0] + return apply(Sequence, tuple(lst)) + + +def cleanup_rep(node, rep): + if rep == 'star': + return Star(node) + elif rep == 'plus': + return Plus(node) + else: + return node + + +def resolve_name(tokens, id, args): + if id in map(lambda x: x[0], tokens): + # It's a token + if args: + print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args) + return Terminal(id) + else: + # It's a name, so assume it's a nonterminal + return NonTerminal(id, args) + + +################################################################################ +# Contents of yappsrt follow. + +# Parser + +class NoMoreTokens(Exception): + """ + Another exception object, for when we run out of tokens + """ + pass + +class Scanner(object): + def __init__(self, patterns, ignore, input=None): + """ + Patterns is [(terminal,regex)...] + Ignore is [terminal,...]; + Input is a string + """ + self.reset(input) + self.ignore = ignore + # The stored patterns are a pair (compiled regex,source + # regex). If the patterns variable passed in to the + # constructor is None, we assume that the class already has a + # proper .patterns list constructed + if patterns is not None: + self.patterns = [] + for k, r in patterns: + self.patterns.append((k, re.compile(r))) + + def reset(self, input): + self.tokens = [] + self.restrictions = [] + self.input = input + self.pos = 0 + + def __repr__(self): + """ + Print the last 10 tokens that have been scanned in + """ + output = '' + for t in self.tokens[-10:]: + output = "%s\n (@%s) %s = %s" % (output, t[0], t[2], repr(t[3])) + return output + + def _scan(self, restrict): + """ + Should scan another token and add it to the list, self.tokens, + and add the restriction to self.restrictions + """ + # Keep looking for a token, ignoring any in self.ignore + token = None + while True: + best_pat = None + # Search the patterns for a match, with earlier + # tokens in the list having preference + best_pat_len = 0 + for p, regexp in self.patterns: + # First check to see if we're restricting to this token + if restrict and p not in restrict and p not in self.ignore: + continue + m = regexp.match(self.input, self.pos) + if m: + # We got a match + best_pat = p + best_pat_len = len(m.group(0)) + break + + # If we didn't find anything, raise an error + if best_pat is None: + msg = "Bad Token" + if restrict: + msg = "Trying to find one of " + ", ".join(restrict) + raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(self.pos), msg)) + + # If we found something that isn't to be ignored, return it + if best_pat in self.ignore: + # This token should be ignored... + self.pos += best_pat_len + else: + end_pos = self.pos + best_pat_len + # Create a token with this data + token = ( + self.pos, + end_pos, + best_pat, + self.input[self.pos:end_pos] + ) + break + if token is not None: + self.pos = token[1] + # Only add this token if it's not in the list + # (to prevent looping) + if not self.tokens or token != self.tokens[-1]: + self.tokens.append(token) + self.restrictions.append(restrict) + return 1 + return 0 + + def token(self, i, restrict=None): + """ + Get the i'th token, and if i is one past the end, then scan + for another token; restrict is a list of tokens that + are allowed, or 0 for any token. + """ + tokens_len = len(self.tokens) + if i == tokens_len: # We are at the end, get the next... + tokens_len += self._scan(restrict) + if i < tokens_len: + if restrict and self.restrictions[i] and restrict > self.restrictions[i]: + raise NotImplementedError("Unimplemented: restriction set changed") + return self.tokens[i] + raise NoMoreTokens + + def rewind(self, i): + tokens_len = len(self.tokens) + if i <= tokens_len: + token = self.tokens[i] + self.tokens = self.tokens[:i] + self.restrictions = self.restrictions[:i] + self.pos = token[0] + + +class CachedScanner(Scanner): + """ + Same as Scanner, but keeps cached tokens for any given input + """ + _cache_ = {} + _goals_ = ['END'] + + @classmethod + def cleanup(cls): + cls._cache_ = {} + + def __init__(self, patterns, ignore, input=None): + try: + self._tokens = self._cache_[input] + except KeyError: + self._tokens = None + self.__tokens = {} + self.__input = input + super(CachedScanner, self).__init__(patterns, ignore, input) + + def reset(self, input): + try: + self._tokens = self._cache_[input] + except KeyError: + self._tokens = None + self.__tokens = {} + self.__input = input + super(CachedScanner, self).reset(input) + + def __repr__(self): + if self._tokens is None: + return super(CachedScanner, self).__repr__() + output = '' + for t in self._tokens[-10:]: + output = "%s\n (@%s) %s = %s" % (output, t[0], t[2], repr(t[3])) + return output + + def token(self, i, restrict=None): + if self._tokens is None: + token = super(CachedScanner, self).token(i, restrict) + self.__tokens[i] = token + if token[2] in self._goals_: # goal tokens + self._cache_[self.__input] = self._tokens = self.__tokens + return token + else: + token = self._tokens.get(i) + if token is None: + raise NoMoreTokens + return token + + def rewind(self, i): + if self._tokens is None: + super(CachedScanner, self).rewind(i) + + +class Parser(object): + def __init__(self, scanner): + self._scanner = scanner + self._pos = 0 + + def reset(self, input): + self._scanner.reset(input) + self._pos = 0 + + def _peek(self, types): + """ + Returns the token type for lookahead; if there are any args + then the list of args is the set of token types to allow + """ + tok = self._scanner.token(self._pos, types) + return tok[2] + + def _scan(self, type): + """ + Returns the matched text, and moves to the next token + """ + tok = self._scanner.token(self._pos, set([type])) + if tok[2] != type: + raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(tok[0]), "Trying to find " + type)) + self._pos += 1 + return tok[3] + + def _rewind(self, n=1): + self._pos -= min(n, self._pos) + self._scanner.rewind(self._pos) + + +def print_error(input, err, scanner): + """This is a really dumb long function to print error messages nicely.""" + p = err.pos + # Figure out the line number + line = input[:p].count('\n') + print err.msg + " on line " + repr(line + 1) + ":" + # Now try printing part of the line + text = input[max(p - 80, 0): + p + 80] + p = p - max(p - 80, 0) + + # Strip to the left + i = text[:p].rfind('\n') + j = text[:p].rfind('\r') + if i < 0 or (0 <= j < i): + i = j + if 0 <= i < p: + p = p - i - 1 + text = text[i + 1:] + + # Strip to the right + i = text.find('\n', p) + j = text.find('\r', p) + if i < 0 or (0 <= j < i): + i = j + if i >= 0: + text = text[:i] + + # Now shorten the text + while len(text) > 70 and p > 60: + # Cut off 10 chars + text = "..." + text[10:] + p = p - 7 + + # Now print the string, along with an indicator + print '> ', text + print '> ', ' ' * p + '^' + print 'List of nearby tokens:', scanner + + +def wrap_error_reporter(parser, rule, *args): + try: + return getattr(parser, rule)(*args) + except SyntaxError, s: + input = parser._scanner.input + try: + print_error(input, s, parser._scanner) + raise + except ImportError: + print "Syntax Error %s on line %d" % (s.msg, input[:s.pos].count('\n') + 1) + except NoMoreTokens: + print "Could not complete parsing; stopped around here:" + print parser._scanner + +# End yappsrt +################################################################################ + + +class ParserDescriptionScanner(Scanner): + def __init__(self, str): + Scanner.__init__(self, [ + ('"rule"', 'rule'), + ('"ignore"', 'ignore'), + ('"token"', 'token'), + ('"option"', 'option'), + ('":"', ':'), + ('"parser"', 'parser'), + ('[ \011\015\012]+', '[ \011\015\012]+'), + ('#.*?\015?\012', '#.*?\015?\012'), + ('END', '$'), + ('ATTR', '<<.+?>>'), + ('STMT', '{{.+?}}'), + ('ID', '[a-zA-Z_][a-zA-Z_0-9]*'), + ('STR', '[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"'), + ('LP', '\\('), + ('RP', '\\)'), + ('LB', '\\['), + ('RB', '\\]'), + ('OR', '[|]'), + ('STAR', '[*]'), + ('PLUS', '[+]'), + ], ['[ \011\015\012]+', '#.*?\015?\012'], str) + + +class ParserDescription(Parser): + def Parser(self): + self._scan('"parser"') + ID = self._scan('ID') + self._scan('":"') + Options = self.Options() + Tokens = self.Tokens() + Rules = self.Rules(Tokens) + END = self._scan('END') + return Generator(ID, Options, Tokens, Rules) + + def Options(self): + opt = {} + while self._peek(set(['"option"', '"token"', '"ignore"', 'END', '"rule"'])) == '"option"': + self._scan('"option"') + self._scan('":"') + Str = self.Str() + opt[Str] = 1 + return opt + + def Tokens(self): + tok = [] + while self._peek(set(['"token"', '"ignore"', 'END', '"rule"'])) in ['"token"', '"ignore"']: + _token_ = self._peek(set(['"token"', '"ignore"'])) + if _token_ == '"token"': + self._scan('"token"') + ID = self._scan('ID') + self._scan('":"') + Str = self.Str() + tok.append((ID, Str)) + else: # == '"ignore"' + self._scan('"ignore"') + self._scan('":"') + Str = self.Str() + tok.append(('#ignore', Str)) + return tok + + def Rules(self, tokens): + rul = [] + while self._peek(set(['"rule"', 'END'])) == '"rule"': + self._scan('"rule"') + ID = self._scan('ID') + OptParam = self.OptParam() + self._scan('":"') + ClauseA = self.ClauseA(tokens) + rul.append((ID, OptParam, ClauseA)) + return rul + + def ClauseA(self, tokens): + ClauseB = self.ClauseB(tokens) + v = [ClauseB] + while self._peek(set(['OR', 'RP', 'RB', '"rule"', 'END'])) == 'OR': + OR = self._scan('OR') + ClauseB = self.ClauseB(tokens) + v.append(ClauseB) + return cleanup_choice(v) + + def ClauseB(self, tokens): + v = [] + while self._peek(set(['STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'END'])) in ['STR', 'ID', 'LP', 'LB', 'STMT']: + ClauseC = self.ClauseC(tokens) + v.append(ClauseC) + return cleanup_sequence(v) + + def ClauseC(self, tokens): + ClauseD = self.ClauseD(tokens) + _token_ = self._peek(set(['PLUS', 'STAR', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'END'])) + if _token_ == 'PLUS': + PLUS = self._scan('PLUS') + return Plus(ClauseD) + elif _token_ == 'STAR': + STAR = self._scan('STAR') + return Star(ClauseD) + else: + return ClauseD + + def ClauseD(self, tokens): + _token_ = self._peek(set(['STR', 'ID', 'LP', 'LB', 'STMT'])) + if _token_ == 'STR': + STR = self._scan('STR') + t = (STR, eval(STR, {}, {})) + if t not in tokens: + tokens.insert(0, t) + return Terminal(STR) + elif _token_ == 'ID': + ID = self._scan('ID') + OptParam = self.OptParam() + return resolve_name(tokens, ID, OptParam) + elif _token_ == 'LP': + LP = self._scan('LP') + ClauseA = self.ClauseA(tokens) + RP = self._scan('RP') + return ClauseA + elif _token_ == 'LB': + LB = self._scan('LB') + ClauseA = self.ClauseA(tokens) + RB = self._scan('RB') + return Option(ClauseA) + else: # == 'STMT' + STMT = self._scan('STMT') + return Eval(STMT[2:-2]) + + def OptParam(self): + if self._peek(set(['ATTR', '":"', 'PLUS', 'STAR', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'END'])) == 'ATTR': + ATTR = self._scan('ATTR') + return ATTR[2:-2] + return '' + + def Str(self): + STR = self._scan('STR') + return eval(STR, {}, {}) + + +# This replaces the default main routine + + +yapps_options = [ + ('context-insensitive-scanner', 'context-insensitive-scanner', + 'Scan all tokens (see docs)') + ] + + +def generate(inputfilename, outputfilename='', dump=0, **flags): + """Generate a grammar, given an input filename (X.g) + and an output filename (defaulting to X.py).""" + + if not outputfilename: + if inputfilename[-2:] == '.g': + outputfilename = inputfilename[:-2] + '.py' + else: + raise Exception("Missing output filename") + + print 'Input Grammar:', inputfilename + print 'Output File:', outputfilename + + DIVIDER = '\n%%\n' # This pattern separates the pre/post parsers + preparser, postparser = None, None # Code before and after the parser desc + + # Read the entire file + s = open(inputfilename, 'r').read() + + # See if there's a separation between the pre-parser and parser + f = find(s, DIVIDER) + if f >= 0: + preparser, s = s[:f] + '\n\n', s[f + len(DIVIDER):] + + # See if there's a separation between the parser and post-parser + f = find(s, DIVIDER) + if f >= 0: + s, postparser = s[:f], '\n\n' + s[f + len(DIVIDER):] + + # Create the parser and scanner + p = ParserDescription(ParserDescriptionScanner(s)) + if not p: + return + + # Now parse the file + t = wrap_error_reporter(p, 'Parser') + if not t: + return # Error + if preparser is not None: + t.preparser = preparser + if postparser is not None: + t.postparser = postparser + + # Check the options + for f in t.options.keys(): + for opt, _, _ in yapps_options: + if f == opt: + break + else: + print 'Warning: unrecognized option', f + # Add command line options to the set + for f in flags.keys(): + t.options[f] = flags[f] + + # Generate the output + if dump: + t.dump_information() + else: + t.output = open(outputfilename, 'w') + t.generate_output() + +if __name__ == '__main__': + import getopt + optlist, args = getopt.getopt(sys.argv[1:], 'f:', ['dump']) + if not args or len(args) > 2: + print 'Usage:' + print ' python', sys.argv[0], '[flags] input.g [output.py]' + print 'Flags:' + print (' --dump' + ' ' * 40)[:35] + 'Dump out grammar information' + for flag, _, doc in yapps_options: + print (' -f' + flag + ' ' * 40)[:35] + doc + else: + # Read in the options and create a list of flags + flags = {} + for opt in optlist: + for flag, name, _ in yapps_options: + if opt == ('-f', flag): + flags[name] = 1 + break + else: + if opt == ('--dump', ''): + flags['dump'] = 1 + else: + print 'Warning: unrecognized option', opt[0], opt[1] + + apply(generate, tuple(args), flags) -- cgit v1.2.1