diff options
author | Eevee (Alex Munroe) <eevee.git@veekun.com> | 2014-08-29 15:36:06 -0700 |
---|---|---|
committer | Eevee (Alex Munroe) <eevee.git@veekun.com> | 2014-08-29 17:07:28 -0700 |
commit | 227ea22bb12c976aa6a59bf74084e181dee436f4 (patch) | |
tree | ef39587cebdb912cf3f6bd093f992c74cf456926 | |
parent | 45d3a91713e7a26050121f300d3ea2b93c7908d1 (diff) | |
download | pyscss-227ea22bb12c976aa6a59bf74084e181dee436f4.tar.gz |
Several vast improvements to string parsing.
- Interpolation is now understood by the parser! It works for
barewords, quoted strings, and both forms of URL.
- Escaped characters are now understood and translated by the parser as
well!
- Rendering strings to CSS attempts to escape them correctly, regardless
of how they were put together.
- The block locator (at least the Python version) is now a little more
aware of CSS escaping.
Unfortunately there are some problems with the C module in this commit,
which I will be promptly fixing.
Conflicts:
scss/blockast.py
-rw-r--r-- | scss/ast.py | 60 | ||||
-rw-r--r-- | scss/cssdefs.py | 27 | ||||
-rw-r--r-- | scss/errors.py | 2 | ||||
-rw-r--r-- | scss/expression.py | 11 | ||||
-rw-r--r-- | scss/grammar/expression.g | 107 | ||||
-rw-r--r-- | scss/grammar/expression.py | 179 | ||||
-rw-r--r-- | scss/grammar/scanner.py | 24 | ||||
-rw-r--r-- | scss/src/block_locator.c | 1 | ||||
-rw-r--r-- | scss/tests/files/general/interpolation-acid.css | 10 | ||||
-rw-r--r-- | scss/tests/files/general/interpolation-acid.scss | 15 | ||||
-rw-r--r-- | scss/types.py | 75 | ||||
-rw-r--r-- | scss/util.py | 1 |
12 files changed, 431 insertions, 81 deletions
diff --git a/scss/ast.py b/scss/ast.py index 1da3805..b1bc77d 100644 --- a/scss/ast.py +++ b/scss/ast.py @@ -207,6 +207,66 @@ class CallOp(Expression): quotes=None) +# TODO this class should delegate the unescaping to the type, rather than +# burying it in the parser +class Interpolation(Expression): + """A string that may contain any number of interpolations: + + foo#{...}bar#{...}baz + """ + def __init__(self, parts, quotes=None, type=String): + self.parts = parts + self.quotes = quotes + self.type = type + + @classmethod + def maybe(cls, parts, quotes=None, type=String): + """Returns an interpolation if there are multiple parts, otherwise a + plain Literal. This keeps the AST somewhat simpler, but also is the + only way `Literal.from_bareword` gets called. + """ + if len(parts) > 1: + return cls(parts, quotes=quotes, type=type) + + if quotes is None and type is String: + return Literal.from_bareword(parts[0]) + + return Literal(type(parts[0], quotes=quotes)) + + def _render_interpolated(self, value): + """Return the result of interpolating `value`, which is slightly + different than just rendering it, since it's an intermediate thing. + """ + # Strings are taken literally + if isinstance(value, String): + return value.value + + # Lists are joined recursively + if isinstance(value, List): + # TODO Ruby /immediately/ respects `compress` here -- need to + # inspect the compilation for whether to pass it in (probably in + # other places too) + return value.delimiter().join( + self._render_interpolated(item) for item in value) + else: + # TODO like here + return value.render() + + def evaluate(self, calculator, divide=False): + result = [] + for i, part in enumerate(self.parts): + if i % 2 == 0: + # First part and other odd parts are literal string + result.append(part) + else: + # Interspersed (even) parts are nodes + value = part.evaluate(calculator, divide) + result.append(self._render_interpolated(value)) + + return self.type(''.join(result), quotes=self.quotes) + + + class Literal(Expression): def __repr__(self): return '<%s(%s)>' % (self.__class__.__name__, repr(self.value)) diff --git a/scss/cssdefs.py b/scss/cssdefs.py index f250b09..f6b2078 100644 --- a/scss/cssdefs.py +++ b/scss/cssdefs.py @@ -425,9 +425,32 @@ def determine_encoding(buf): return encoding - # ------------------------------------------------------------------------------ -# Bits and pieces of grammar, as regexen +# Bits and pieces of grammar, mostly as regexen + +# CSS escape sequences are either a backslash followed by a single character, +# or a backslash followed by one to six hex digits and a single optional +# whitespace. Escaped newlines become nothing. +# Ref: http://dev.w3.org/csswg/css-syntax-3/#consume-an-escaped-code-point +unescape_rx = re.compile( + r"\\(.)|\\([0-9a-fA-F]{1,6})[\n\t ]?|\\\n", re.DOTALL) + + +def _unescape_one(match): + if match.group(1) is not None: + return match.group(1) + elif match.group(2) is not None: + return six.chr(int(match.group(2), 16)) + else: + return six.text_type() + + +def unescape(string): + """Given a raw CSS string (i.e. taken directly from CSS source with no + processing), eliminate all backslash escapes. + """ + return unescape_rx.sub(_unescape_one, string) + _expr_glob_re = re.compile(r''' \#\{(.*?)\} # Global Interpolation only diff --git a/scss/errors.py b/scss/errors.py index b1e7f12..8be3019 100644 --- a/scss/errors.py +++ b/scss/errors.py @@ -213,7 +213,7 @@ class SassParseError(SassError): def format_prefix(self): decorated_expr, line = add_error_marker(self.expression, self.expression_pos or -1) - return """Error parsing expression:\n{0}\n""".format(decorated_expr) + return """Error parsing expression at {1}:\n{0}\n""".format(decorated_expr, self.expression_pos) class SassEvaluationError(SassError): diff --git a/scss/expression.py b/scss/expression.py index 54001f6..9de95ac 100644 --- a/scss/expression.py +++ b/scss/expression.py @@ -8,6 +8,7 @@ from warnings import warn import six +from scss.ast import Literal from scss.cssdefs import _expr_glob_re, _interpolate_re from scss.errors import SassError, SassEvaluationError, SassParseError from scss.grammar.expression import SassExpression, SassExpressionScanner @@ -164,4 +165,14 @@ class Calculator(object): self.ast_cache[key] = ast return ast + def parse_interpolations(self, string): + """Parse a string for interpolations, but don't treat anything else as + Sass syntax. Returns an AST node. + """ + # Shortcut: if there are no #s in the string in the first place, it + # must not have any interpolations, right? + if '#' not in string: + return Literal(String.unquoted(string)) + return self.parse_expression(string, 'goal_interpolated_anything') + __all__ = ('Calculator',) diff --git a/scss/grammar/expression.g b/scss/grammar/expression.g index 9cc833d..525db2e 100644 --- a/scss/grammar/expression.g +++ b/scss/grammar/expression.g @@ -14,11 +14,13 @@ from scss.ast import AnyOp from scss.ast import AllOp from scss.ast import NotOp from scss.ast import CallOp -from scss.ast import Variable +from scss.ast import Interpolation from scss.ast import Literal +from scss.ast import Variable from scss.ast import ListLiteral from scss.ast import MapLiteral from scss.ast import ArgspecLiteral +from scss.cssdefs import unescape from scss.types import Color from scss.types import Number from scss.types import String @@ -51,6 +53,11 @@ parser SassExpression: token LT: "<" token GT: ">" token DOTDOTDOT: '[.]{3}' + token SINGLE_QUOTE: "'" + token DOUBLE_QUOTE: '"' + # Don't allow quotes or # unless they're escaped (or the # is alone) + token SINGLE_STRING_GUTS: '([^\'\\\\#]|[\\\\].|#(?![{]))*' + token DOUBLE_STRING_GUTS: "([^\"\\\\#]|[\\\\].|#(?![{]))*" token KWSTR: "'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'(?=\s*:)" token STR: "'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'" token KWQSTR: '"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"(?=\s*:)' @@ -65,13 +72,19 @@ parser SassExpression: token VAR: "\$[-a-zA-Z0-9_]+" token FNCT: "[-a-zA-Z_][-a-zA-Z0-9_]*(?=\()" token KWID: "[-a-zA-Z_][-a-zA-Z0-9_]*(?=\s*:)" - token ID: "[-a-zA-Z_][-a-zA-Z0-9_]*" + # TODO Ruby is a bit more flexible here, for example allowing 1#{2}px + token BAREWORD: "[-a-zA-Z_][-a-zA-Z0-9_]*" token BANG_IMPORTANT: "!important" + + token INTERP_START: "#[{]" + token INTERP_END: "[}]" + token INTERP_ANYTHING: "([^#]|#(?![{]))*" # http://dev.w3.org/csswg/css-syntax-3/#consume-a-url-token0 - # URLs may not contain quotes, parentheses, or unprintables + # Bare URLs may not contain quotes, parentheses, or unprintables. Quoted + # URLs may, of course, contain whatever they like. # TODO reify escapes, for this and for strings # FIXME: Also, URLs may not contain $ as it breaks urls with variables? - token URL: "(?:[\\\\].|[^$'\"()\\x00-\\x08\\x0b\\x0e-\\x1f\\x7f])*" + token BAREURL: "(?:[\\\\].|[^#$'\"()\\x00-\\x08\\x0b\\x0e-\\x1f\\x7f]|#(?![{]))*" # Goals: rule goal: expr_lst END {{ return expr_lst }} @@ -188,23 +201,19 @@ parser SassExpression: # regular function rule, which makes this not quite LL -- but they're # different tokens so yapps can't tell, and it resolves the conflict by # picking the first one. - | "url" LPAR - ( - URL {{ quotes = None }} - | "\"" URL "\"" {{ quotes = '"' }} - | "'" URL "'" {{ quotes = "'" }} - ) - RPAR {{ return Literal(Url(URL, quotes=quotes)) }} + | "url" LPAR interpolated_url RPAR + {{ print("url!"); return interpolated_url }} | FNCT LPAR argspec RPAR {{ return CallOp(FNCT, argspec) }} | BANG_IMPORTANT {{ return Literal(String(BANG_IMPORTANT, quotes=None)) }} - | ID {{ return Literal.from_bareword(ID) }} + | interpolated_bareword {{ return Interpolation.maybe(interpolated_bareword) }} | NUM {{ UNITS = None }} [ UNITS ] {{ return Literal(Number(float(NUM), unit=UNITS)) }} - | STR {{ return Literal(String(dequote(STR), quotes="'")) }} - | QSTR {{ return Literal(String(dequote(QSTR), quotes='"')) }} + | interpolated_string {{ return interpolated_string }} | COLOR {{ return Literal(Color.from_hex(COLOR, literal=True)) }} | VAR {{ return Variable(VAR) }} + # TODO none of these things respect interpolation -- would love to not need + # to repeat all the rules rule kwatom: # nothing | KWID {{ return Literal.from_bareword(KWID) }} @@ -215,4 +224,74 @@ parser SassExpression: | KWCOLOR {{ return Literal(Color.from_hex(KWCOLOR, literal=True)) }} | KWVAR {{ return Variable(KWVAR) }} + # ------------------------------------------------------------------------- + # Interpolation, which is a right mess, because it depends very heavily on + # context -- what other characters are allowed, and when do we stop? + # Thankfully these rules all look pretty similar: there's a delimiter, a + # literal, and some number of interpolations and trailing literals. + rule interpolation: + INTERP_START + expr_lst + INTERP_END {{ return expr_lst }} + + rule interpolated_url: + # Note: This rule DOES NOT include the url(...) delimiters + interpolated_bare_url + {{ return Interpolation.maybe(interpolated_bare_url, type=Url, quotes=None) }} + | interpolated_string_single + {{ return Interpolation.maybe(interpolated_string_single, type=Url, quotes="'") }} + | interpolated_string_double + {{ return Interpolation.maybe(interpolated_string_double, type=Url, quotes='"') }} + + rule interpolated_bare_url: + BAREURL {{ parts = [unescape(BAREURL)] }} + ( + interpolation {{ parts.append(interpolation) }} + BAREURL {{ parts.append(unescape(BAREURL)) }} + )* {{ return parts }} + + rule interpolated_string: + interpolated_string_single + {{ return Interpolation.maybe(interpolated_string_single, quotes="'") }} + | interpolated_string_double + {{ return Interpolation.maybe(interpolated_string_double, quotes='"') }} + + rule interpolated_string_single: + SINGLE_QUOTE + SINGLE_STRING_GUTS {{ parts = [unescape(SINGLE_STRING_GUTS)] }} + ( + interpolation {{ parts.append(interpolation) }} + SINGLE_STRING_GUTS {{ parts.append(unescape(SINGLE_STRING_GUTS)) }} + )* + SINGLE_QUOTE {{ return parts }} + + rule interpolated_string_double: + DOUBLE_QUOTE + DOUBLE_STRING_GUTS {{ parts = [unescape(DOUBLE_STRING_GUTS)] }} + ( + interpolation {{ parts.append(interpolation) }} + DOUBLE_STRING_GUTS {{ parts.append(unescape(DOUBLE_STRING_GUTS)) }} + )* + DOUBLE_QUOTE {{ return parts }} + + rule interpolated_bareword: + # Again, a bareword has a fairly limited set of allowed characters + BAREWORD {{ parts = [unescape(BAREWORD)] }} + ( + interpolation {{ parts.append(interpolation) }} + BAREWORD {{ parts.append(unescape(BAREWORD)) }} + )* {{ return parts }} + + + rule goal_interpolated_anything: + # This isn't part of the grammar, but rather a separate goal, used for + # text that might contain interpolations but should not be parsed + # outside of them -- e.g., selector strings. + INTERP_ANYTHING {{ parts = [INTERP_ANYTHING] }} + ( + interpolation {{ parts.append(interpolation) }} + INTERP_ANYTHING {{ parts.append(INTERP_ANYTHING) }} + )* + END {{ return Interpolation.maybe(parts) }} + %% diff --git a/scss/grammar/expression.py b/scss/grammar/expression.py index aed08b9..7a5f8e6 100644 --- a/scss/grammar/expression.py +++ b/scss/grammar/expression.py @@ -14,11 +14,13 @@ from scss.ast import AnyOp from scss.ast import AllOp from scss.ast import NotOp from scss.ast import CallOp -from scss.ast import Variable +from scss.ast import Interpolation from scss.ast import Literal +from scss.ast import Variable from scss.ast import ListLiteral from scss.ast import MapLiteral from scss.ast import ArgspecLiteral +from scss.cssdefs import unescape from scss.types import Color from scss.types import Number from scss.types import String @@ -33,8 +35,6 @@ from scss.grammar import Scanner class SassExpressionScanner(Scanner): patterns = None _patterns = [ - ('"\'"', "'"), - ('"\\""', '"'), ('"url"', 'url'), ('":"', ':'), ('","', ','), @@ -58,6 +58,10 @@ class SassExpressionScanner(Scanner): ('LT', '<'), ('GT', '>'), ('DOTDOTDOT', '[.]{3}'), + ('SINGLE_QUOTE', "'"), + ('DOUBLE_QUOTE', '"'), + ('SINGLE_STRING_GUTS', "([^'\\\\#]|[\\\\].|#(?![{]))*"), + ('DOUBLE_STRING_GUTS', '([^"\\\\#]|[\\\\].|#(?![{]))*'), ('KWSTR', "'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'(?=\\s*:)"), ('STR', "'[^'\\\\]*(?:\\\\.[^'\\\\]*)*'"), ('KWQSTR', '"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"(?=\\s*:)'), @@ -72,9 +76,12 @@ class SassExpressionScanner(Scanner): ('VAR', '\\$[-a-zA-Z0-9_]+'), ('FNCT', '[-a-zA-Z_][-a-zA-Z0-9_]*(?=\\()'), ('KWID', '[-a-zA-Z_][-a-zA-Z0-9_]*(?=\\s*:)'), - ('ID', '[-a-zA-Z_][-a-zA-Z0-9_]*'), + ('BAREWORD', '[-a-zA-Z_][-a-zA-Z0-9_]*'), ('BANG_IMPORTANT', '!important'), - ('URL', '(?:[\\\\].|[^$\'"()\\x00-\\x08\\x0b\\x0e-\\x1f\\x7f])*'), + ('INTERP_START', '#[{]'), + ('INTERP_END', '[}]'), + ('INTERP_ANYTHING', '([^#]|#(?![{]))*'), + ('BAREURL', '(?:[\\\\].|[^#$\'"()\\x00-\\x08\\x0b\\x0e-\\x1f\\x7f]|#(?![{]))*'), ] def __init__(self, input=None): @@ -166,7 +173,7 @@ class SassExpression(Parser): def expr_lst(self): expr_slst = self.expr_slst() v = [expr_slst] - while self._peek(self.argspec_items_rsts) == '","': + while self._peek(self.expr_lst_rsts) == '","': self._scan('","') expr_slst = self.expr_slst() v.append(expr_slst) @@ -175,7 +182,7 @@ class SassExpression(Parser): def expr_slst(self): or_expr = self.or_expr() v = [or_expr] - while self._peek(self.expr_slst_rsts) not in self.argspec_items_rsts: + while self._peek(self.expr_slst_rsts) not in self.expr_lst_rsts: or_expr = self.or_expr() v.append(or_expr) return ListLiteral(v, comma=False) if len(v) > 1 else v[0] @@ -301,22 +308,9 @@ class SassExpression(Parser): elif _token_ == '"url"': self._scan('"url"') LPAR = self._scan('LPAR') - _token_ = self._peek(self.atom_rsts_) - if _token_ == 'URL': - URL = self._scan('URL') - quotes = None - elif _token_ == '"\\""': - self._scan('"\\""') - URL = self._scan('URL') - self._scan('"\\""') - quotes = '"' - else: # == '"\'"' - self._scan('"\'"') - URL = self._scan('URL') - self._scan('"\'"') - quotes = "'" + interpolated_url = self.interpolated_url() RPAR = self._scan('RPAR') - return Literal(Url(URL, quotes=quotes)) + print("url!"); return interpolated_url elif _token_ == 'FNCT': FNCT = self._scan('FNCT') LPAR = self._scan('LPAR') @@ -326,21 +320,18 @@ class SassExpression(Parser): elif _token_ == 'BANG_IMPORTANT': BANG_IMPORTANT = self._scan('BANG_IMPORTANT') return Literal(String(BANG_IMPORTANT, quotes=None)) - elif _token_ == 'ID': - ID = self._scan('ID') - return Literal.from_bareword(ID) + elif _token_ == 'BAREWORD': + interpolated_bareword = self.interpolated_bareword() + return Interpolation.maybe(interpolated_bareword) elif _token_ == 'NUM': NUM = self._scan('NUM') UNITS = None - if self._peek(self.atom_rsts__) == 'UNITS': + if self._peek(self.atom_rsts_) == 'UNITS': UNITS = self._scan('UNITS') return Literal(Number(float(NUM), unit=UNITS)) - elif _token_ == 'STR': - STR = self._scan('STR') - return Literal(String(dequote(STR), quotes="'")) - elif _token_ == 'QSTR': - QSTR = self._scan('QSTR') - return Literal(String(dequote(QSTR), quotes='"')) + elif _token_ not in self.atom_chks: + interpolated_string = self.interpolated_string() + return interpolated_string elif _token_ == 'COLOR': COLOR = self._scan('COLOR') return Literal(Color.from_hex(COLOR, literal=True)) @@ -374,32 +365,122 @@ class SassExpression(Parser): KWVAR = self._scan('KWVAR') return Variable(KWVAR) - u_expr_chks = set(['"url"', 'LPAR', 'COLOR', 'QSTR', 'NUM', 'FNCT', 'STR', 'VAR', 'BANG_IMPORTANT', 'ID']) - m_expr_rsts = set(['LPAR', 'SUB', 'QSTR', 'RPAR', 'MUL', 'DIV', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'STR', 'VAR', 'EQ', 'ID', 'AND', 'ADD', 'NOT', 'OR', '","']) + def interpolation(self): + INTERP_START = self._scan('INTERP_START') + expr_lst = self.expr_lst() + INTERP_END = self._scan('INTERP_END') + return expr_lst + + def interpolated_url(self): + _token_ = self._peek(self.interpolated_url_rsts) + if _token_ == 'BAREURL': + interpolated_bare_url = self.interpolated_bare_url() + return Interpolation.maybe(interpolated_bare_url, type=Url, quotes=None) + elif _token_ == 'SINGLE_QUOTE': + interpolated_string_single = self.interpolated_string_single() + return Interpolation.maybe(interpolated_string_single, type=Url, quotes="'") + else: # == 'DOUBLE_QUOTE' + interpolated_string_double = self.interpolated_string_double() + return Interpolation.maybe(interpolated_string_double, type=Url, quotes='"') + + def interpolated_bare_url(self): + BAREURL = self._scan('BAREURL') + parts = [unescape(BAREURL)] + while self._peek(self.interpolated_bare_url_rsts) == 'INTERP_START': + interpolation = self.interpolation() + parts.append(interpolation) + BAREURL = self._scan('BAREURL') + parts.append(unescape(BAREURL)) + return parts + + def interpolated_string(self): + _token_ = self._peek(self.interpolated_string_rsts) + if _token_ == 'SINGLE_QUOTE': + interpolated_string_single = self.interpolated_string_single() + return Interpolation.maybe(interpolated_string_single, quotes="'") + else: # == 'DOUBLE_QUOTE' + interpolated_string_double = self.interpolated_string_double() + return Interpolation.maybe(interpolated_string_double, quotes='"') + + def interpolated_string_single(self): + SINGLE_QUOTE = self._scan('SINGLE_QUOTE') + SINGLE_STRING_GUTS = self._scan('SINGLE_STRING_GUTS') + parts = [unescape(SINGLE_STRING_GUTS)] + while self._peek(self.interpolated_string_single_rsts) == 'INTERP_START': + interpolation = self.interpolation() + parts.append(interpolation) + SINGLE_STRING_GUTS = self._scan('SINGLE_STRING_GUTS') + parts.append(unescape(SINGLE_STRING_GUTS)) + SINGLE_QUOTE = self._scan('SINGLE_QUOTE') + return parts + + def interpolated_string_double(self): + DOUBLE_QUOTE = self._scan('DOUBLE_QUOTE') + DOUBLE_STRING_GUTS = self._scan('DOUBLE_STRING_GUTS') + parts = [unescape(DOUBLE_STRING_GUTS)] + while self._peek(self.interpolated_string_double_rsts) == 'INTERP_START': + interpolation = self.interpolation() + parts.append(interpolation) + DOUBLE_STRING_GUTS = self._scan('DOUBLE_STRING_GUTS') + parts.append(unescape(DOUBLE_STRING_GUTS)) + DOUBLE_QUOTE = self._scan('DOUBLE_QUOTE') + return parts + + def interpolated_bareword(self): + BAREWORD = self._scan('BAREWORD') + parts = [unescape(BAREWORD)] + while self._peek(self.interpolated_bareword_rsts) == 'INTERP_START': + interpolation = self.interpolation() + parts.append(interpolation) + BAREWORD = self._scan('BAREWORD') + parts.append(unescape(BAREWORD)) + return parts + + def goal_interpolated_anything(self): + INTERP_ANYTHING = self._scan('INTERP_ANYTHING') + parts = [INTERP_ANYTHING] + while self._peek(self.goal_interpolated_anything_rsts) == 'INTERP_START': + interpolation = self.interpolation() + parts.append(interpolation) + INTERP_ANYTHING = self._scan('INTERP_ANYTHING') + parts.append(INTERP_ANYTHING) + END = self._scan('END') + return Interpolation.maybe(parts) + + u_expr_chks = set(['"url"', 'LPAR', 'DOUBLE_QUOTE', 'COLOR', 'BAREWORD', 'NUM', 'FNCT', 'VAR', 'BANG_IMPORTANT', 'SINGLE_QUOTE']) + m_expr_rsts = set(['LPAR', 'DOUBLE_QUOTE', 'SUB', 'RPAR', 'MUL', 'INTERP_END', 'BANG_IMPORTANT', 'DIV', 'LE', 'COLOR', 'NE', 'LT', 'NUM', 'BAREWORD', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'VAR', 'EQ', 'AND', 'ADD', 'SINGLE_QUOTE', 'NOT', 'OR', '","']) argspec_items_rsts = set(['RPAR', 'END', '","']) expr_map_rsts = set(['RPAR', '","']) - argspec_items_rsts__ = set(['KWVAR', 'LPAR', 'QSTR', 'SLURPYVAR', 'COLOR', 'DOTDOTDOT', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID']) + argspec_items_rsts__ = set(['KWVAR', 'LPAR', 'DOUBLE_QUOTE', 'BAREWORD', 'SLURPYVAR', 'COLOR', 'DOTDOTDOT', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'NOT', 'BANG_IMPORTANT', 'SINGLE_QUOTE']) kwatom_rsts = set(['KWVAR', 'KWID', 'KWSTR', 'KWQSTR', 'KWCOLOR', '":"', 'KWNUM']) - argspec_item_chks = set(['"url"', 'LPAR', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID']) + argspec_item_chks = set(['"url"', 'LPAR', 'DOUBLE_QUOTE', 'BAREWORD', 'COLOR', 'SIGN', 'VAR', 'ADD', 'NUM', 'FNCT', 'NOT', 'BANG_IMPORTANT', 'SINGLE_QUOTE']) a_expr_chks = set(['ADD', 'SUB']) - expr_slst_rsts = set(['"url"', 'LPAR', 'END', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', 'RPAR', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID', '","']) - atom_rsts__ = set(['LPAR', 'SUB', 'QSTR', 'RPAR', 'VAR', 'MUL', 'DIV', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'STR', 'UNITS', 'EQ', 'ID', 'AND', 'ADD', 'NOT', 'OR', '","']) - or_expr_rsts = set(['"url"', 'LPAR', 'END', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', 'RPAR', 'FNCT', 'STR', 'NOT', 'ID', 'BANG_IMPORTANT', 'OR', '","']) - and_expr_rsts = set(['AND', 'LPAR', 'RPAR', 'END', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'ID', 'BANG_IMPORTANT', 'OR', '","']) - comparison_rsts = set(['LPAR', 'QSTR', 'RPAR', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', '"url"', 'GT', 'END', 'SIGN', 'ADD', 'FNCT', 'STR', 'VAR', 'EQ', 'ID', 'AND', 'GE', 'NOT', 'OR', '","']) + expr_slst_rsts = set(['"url"', 'LPAR', 'DOUBLE_QUOTE', 'BAREWORD', 'END', 'COLOR', 'SIGN', 'VAR', 'ADD', 'NUM', 'RPAR', 'FNCT', 'NOT', 'INTERP_END', 'BANG_IMPORTANT', 'SINGLE_QUOTE', '","']) + interpolated_bareword_rsts = set(['LPAR', 'DOUBLE_QUOTE', 'SUB', 'RPAR', 'MUL', 'INTERP_END', 'BANG_IMPORTANT', 'DIV', 'LE', 'INTERP_START', 'COLOR', 'NE', 'LT', 'NUM', 'BAREWORD', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'VAR', 'EQ', 'AND', 'ADD', 'SINGLE_QUOTE', 'NOT', 'OR', '","']) + or_expr_rsts = set(['"url"', 'LPAR', 'DOUBLE_QUOTE', 'BAREWORD', 'END', 'SINGLE_QUOTE', 'COLOR', 'SIGN', 'VAR', 'ADD', 'NUM', 'RPAR', 'FNCT', 'NOT', 'INTERP_END', 'BANG_IMPORTANT', 'OR', '","']) + interpolated_url_rsts = set(['DOUBLE_QUOTE', 'BAREURL', 'SINGLE_QUOTE']) + interpolated_string_single_rsts = set(['SINGLE_QUOTE', 'INTERP_START']) + and_expr_rsts = set(['AND', 'LPAR', 'DOUBLE_QUOTE', 'BAREWORD', 'END', 'SINGLE_QUOTE', 'COLOR', 'RPAR', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'NOT', 'INTERP_END', 'BANG_IMPORTANT', 'OR', '","']) + comparison_rsts = set(['LPAR', 'DOUBLE_QUOTE', 'RPAR', 'INTERP_END', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', 'BAREWORD', '"url"', 'GT', 'END', 'SIGN', 'ADD', 'FNCT', 'VAR', 'EQ', 'AND', 'GE', 'SINGLE_QUOTE', 'NOT', 'OR', '","']) argspec_chks = set(['DOTDOTDOT', 'SLURPYVAR']) - atom_rsts_ = set(['URL', '"\\""', '"\'"']) + atom_rsts_ = set(['LPAR', 'DOUBLE_QUOTE', 'SUB', 'RPAR', 'VAR', 'MUL', 'INTERP_END', 'BANG_IMPORTANT', 'DIV', 'LE', 'COLOR', 'NE', 'LT', 'NUM', 'BAREWORD', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'UNITS', 'EQ', 'AND', 'ADD', 'SINGLE_QUOTE', 'NOT', 'OR', '","']) + interpolated_string_double_rsts = set(['DOUBLE_QUOTE', 'INTERP_START']) expr_map_rsts_ = set(['KWVAR', 'KWID', 'KWSTR', 'KWQSTR', 'RPAR', 'KWCOLOR', '":"', 'KWNUM', '","']) - u_expr_rsts = set(['"url"', 'LPAR', 'COLOR', 'QSTR', 'SIGN', 'ADD', 'NUM', 'FNCT', 'STR', 'VAR', 'BANG_IMPORTANT', 'ID']) + u_expr_rsts = set(['"url"', 'LPAR', 'DOUBLE_QUOTE', 'COLOR', 'SIGN', 'BAREWORD', 'ADD', 'NUM', 'FNCT', 'VAR', 'BANG_IMPORTANT', 'SINGLE_QUOTE']) + atom_chks = set(['COLOR', 'VAR']) comparison_chks = set(['GT', 'GE', 'NE', 'LT', 'LE', 'EQ']) - argspec_items_rsts_ = set(['KWVAR', 'LPAR', 'RPAR', 'QSTR', 'END', 'SLURPYVAR', 'COLOR', 'DOTDOTDOT', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID']) - a_expr_rsts = set(['LPAR', 'SUB', 'QSTR', 'RPAR', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'STR', 'VAR', 'EQ', 'ID', 'AND', 'ADD', 'NOT', 'OR', '","']) + argspec_items_rsts_ = set(['KWVAR', 'LPAR', 'RPAR', 'BAREWORD', 'END', 'SLURPYVAR', 'COLOR', 'DOTDOTDOT', 'DOUBLE_QUOTE', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'NOT', 'BANG_IMPORTANT', 'SINGLE_QUOTE']) + a_expr_rsts = set(['LPAR', 'DOUBLE_QUOTE', 'SUB', 'RPAR', 'INTERP_END', 'BANG_IMPORTANT', 'LE', 'COLOR', 'NE', 'LT', 'NUM', 'BAREWORD', '"url"', 'GT', 'END', 'SIGN', 'GE', 'FNCT', 'VAR', 'EQ', 'AND', 'ADD', 'SINGLE_QUOTE', 'NOT', 'OR', '","']) + interpolated_string_rsts = set(['DOUBLE_QUOTE', 'SINGLE_QUOTE']) m_expr_chks = set(['MUL', 'DIV']) kwatom_rsts_ = set(['UNITS', '":"']) - argspec_items_chks = set(['KWVAR', '"url"', 'LPAR', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', 'FNCT', 'STR', 'NOT', 'BANG_IMPORTANT', 'ID']) - argspec_rsts = set(['KWVAR', 'LPAR', 'BANG_IMPORTANT', 'END', 'SLURPYVAR', 'COLOR', 'DOTDOTDOT', 'RPAR', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'QSTR', 'SIGN', 'ID']) - atom_rsts = set(['KWVAR', 'KWID', 'KWSTR', 'BANG_IMPORTANT', 'LPAR', 'COLOR', 'KWQSTR', 'SIGN', 'RPAR', 'KWCOLOR', 'VAR', 'ADD', 'NUM', '"url"', '":"', 'STR', 'NOT', 'QSTR', 'KWNUM', 'ID', 'FNCT']) + goal_interpolated_anything_rsts = set(['END', 'INTERP_START']) + interpolated_bare_url_rsts = set(['RPAR', 'INTERP_START']) + expr_lst_rsts = set(['INTERP_END', 'RPAR', 'END', '","']) + argspec_items_chks = set(['KWVAR', '"url"', 'DOUBLE_QUOTE', 'BAREWORD', 'LPAR', 'COLOR', 'SIGN', 'VAR', 'ADD', 'NUM', 'FNCT', 'NOT', 'BANG_IMPORTANT', 'SINGLE_QUOTE']) + argspec_rsts = set(['KWVAR', 'LPAR', 'DOUBLE_QUOTE', 'BANG_IMPORTANT', 'END', 'SLURPYVAR', 'COLOR', 'BAREWORD', 'DOTDOTDOT', 'RPAR', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'NOT', 'SIGN', 'SINGLE_QUOTE']) + atom_rsts = set(['KWVAR', 'KWID', 'KWSTR', 'BANG_IMPORTANT', 'LPAR', 'COLOR', 'BAREWORD', 'KWQSTR', 'SIGN', 'DOUBLE_QUOTE', 'RPAR', 'KWCOLOR', 'VAR', 'ADD', 'NUM', '"url"', '":"', 'NOT', 'KWNUM', 'SINGLE_QUOTE', 'FNCT']) argspec_chks_ = set(['END', 'RPAR']) - argspec_rsts_ = set(['KWVAR', 'LPAR', 'BANG_IMPORTANT', 'END', 'COLOR', 'QSTR', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'STR', 'NOT', 'RPAR', 'ID']) + argspec_rsts_ = set(['KWVAR', 'LPAR', 'DOUBLE_QUOTE', 'BANG_IMPORTANT', 'END', 'COLOR', 'BAREWORD', 'SIGN', 'VAR', 'ADD', 'NUM', '"url"', 'FNCT', 'NOT', 'RPAR', 'SINGLE_QUOTE']) diff --git a/scss/grammar/scanner.py b/scss/grammar/scanner.py index d466254..8e48d2b 100644 --- a/scss/grammar/scanner.py +++ b/scss/grammar/scanner.py @@ -9,13 +9,14 @@ import re DEBUG = False -# TODO copied from __init__ -_blocks_re = re.compile(r'[{},;()\'"\n]') - try: from ._scanner import locate_blocks except ImportError: + # Regex for finding a minimum set of characters that might affect where a + # block starts or ends + _blocks_re = re.compile(r'[{},;()\'"\n]|\\.', re.DOTALL) + def locate_blocks(codestr): """ For processing CSS like strings. @@ -41,7 +42,10 @@ except ImportError: if c == '\n': lineno += 1 - if instr is not None: + if c == '\\': + # Escape, also consumes the next character + pass + elif instr is not None: if c == instr: instr = None # A string ends (FIXME: needs to accept escaped characters) elif c in ('"', "'"): @@ -93,11 +97,17 @@ except ImportError: if _selectors: yield lineno, _selectors, _codestr if par: - raise Exception("Missing closing parenthesis somewhere in block: '%s'" % _selectors) + error = "Parentheses never closed" elif instr: - raise Exception("Missing closing string somewhere in block: '%s'" % _selectors) + error = "String literal never terminated" else: - raise Exception("Block never closed: '%s'" % _selectors) + error = "Block never closed" + # TODO should remember the line + position of the actual + # problem, and show it in a SassError + raise SyntaxError( + "Couldn't parse block starting on line {0}: {1}" + .format(lineno, error) + ) losestr = codestr[lose:] for _property in losestr.split(';'): _property = _property.strip() diff --git a/scss/src/block_locator.c b/scss/src/block_locator.c index e081d84..14b3638 100644 --- a/scss/src/block_locator.c +++ b/scss/src/block_locator.c @@ -15,6 +15,7 @@ #include <string.h> #include "block_locator.h" +#define DEBUG int _strip(Py_UNICODE *begin, Py_UNICODE *end, int *lineno, Py_UNICODE **out) { while (begin < end && (*begin == '\n' || *begin == '\t' || *begin == ' ')) begin++; if (out != NULL) { diff --git a/scss/tests/files/general/interpolation-acid.css b/scss/tests/files/general/interpolation-acid.css new file mode 100644 index 0000000..7a72ad2 --- /dev/null +++ b/scss/tests/files/general/interpolation-acid.css @@ -0,0 +1,10 @@ +.foo foo-foo { + onefoothree: foo; + string: hellofoogoodbye; + string-single: "he'llofoogood\"bye"; + string-double: "he\"llofoogood'bye"; + url: url(hellofoogoodbye); + url-single: url("he'llofoogood\"bye"); + url-double: url("he\"llofoogood'bye"); + nested: "type-of(foo) bar"; +} diff --git a/scss/tests/files/general/interpolation-acid.scss b/scss/tests/files/general/interpolation-acid.scss new file mode 100644 index 0000000..e5580c6 --- /dev/null +++ b/scss/tests/files/general/interpolation-acid.scss @@ -0,0 +1,15 @@ +$foo: foo; + +.#{$foo} #{$foo}-#{$foo} { + one#{$foo}three: $foo; + + string: hello#{$foo}goodbye; + string-single: 'he\'llo#{$foo}good"bye'; + string-double: "he\"llo#{$foo}good'bye"; + + url: url(hello#{$foo}goodbye); + url-single: url('he\'llo#{$foo}good"bye'); + url-double: url("he\"llo#{$foo}good'bye"); + + nested: "#{"type-of(f#{"o"}o)" "bar"}"; +} diff --git a/scss/types.py b/scss/types.py index 5803b24..90faca1 100644 --- a/scss/types.py +++ b/scss/types.py @@ -6,6 +6,8 @@ from __future__ import unicode_literals from collections import Iterable import colorsys import operator +import re +import string from warnings import warn import six @@ -1006,6 +1008,8 @@ class String(Value): sass_type_name = 'string' + bad_identifier_rx = re.compile('[^-_a-zA-Z\x80-\U0010FFFF]') + def __init__(self, value, quotes='"'): if isinstance(value, String): # TODO unclear if this should be here, but many functions rely on @@ -1041,12 +1045,6 @@ class String(Value): def __hash__(self): return hash(self.value) - def __str__(self): - if self.quotes: - return self.quotes + escape(self.value) + self.quotes - else: - return self.value - def __repr__(self): if self.quotes: quotes = '(' + self.quotes + ')' @@ -1083,15 +1081,76 @@ class String(Value): return String(self.value * int(other.value), quotes=self.quotes) + def _escape_character(self, match): + """Given a single character, return it appropriately CSS-escaped.""" + # TODO is there any case where we'd want to use unicode escaping? + # TODO unsure if this works with newlines + return '\\' + match.group(0) + + def _is_name_start(self, ch): + if ch == '_': + return True + if ord(ch) >= 128: + return True + if ch in string.ascii_letters: + return True + return False + def render(self, compress=False): - return self.__str__() + # TODO should preserve original literals here too -- even the quotes. + # or at least that's what sass does. + # Escape and add quotes as appropriate. + if self.quotes is None: + return self._render_bareword() + else: + return self._render_quoted() + + def _render_bareword(self): + # This is a bareword, so almost anything outside \w needs escaping + ret = self.value + ret = self.bad_identifier_rx.sub(self._escape_character, ret) + + # Also apply some minor quibbling rules about how barewords can + # start: with a "name start", an escape, a hyphen followed by one + # of those, or two hyphens. + if ret[0] == '-': + if ret[1] in '-\\' or self._is_name_start(ret[1]): + pass + else: + # Escape the second character + # TODO what if it's a digit, oops + ret = ret[0] + '\\' + ret[1:] + elif ret[0] == '\\' or self._is_name_start(ret[0]): + pass + else: + # Escape the first character + # TODO what if it's a digit, oops + ret = '\\' + ret + + return ret + + def _render_quoted(self): + # Strictly speaking, the only things we need to quote are the quotes + # themselves, backslashes, and newlines. + # Note: We ignore the original quotes and always use double quotes, to + # match Ruby Sass's behavior. This isn't particularly well-specified, + # though. + quote = '"' + ret = self.value + ret = ret.replace('\\', '\\\\') + ret = ret.replace(quote, '\\' + quote) + # Note that a literal newline is ignored when escaped, so we have to + # use the codepoint instead. But we'll leave the newline as well, to + # aid readability. + ret = ret.replace('\n', '\\a\\\n') + return quote + ret + quote class Url(String): def render(self, compress=False): # TODO url-escape whatever needs escaping # TODO does that mean we should un-url-escape when parsing? probably - return "url({0})".format(super(String, self).render(compress)) + return "url({0})".format(super(Url, self).render(compress)) class Map(Value): diff --git a/scss/util.py b/scss/util.py index 481a0c0..88dd859 100644 --- a/scss/util.py +++ b/scss/util.py @@ -84,6 +84,7 @@ def escape(s): return re.sub(r'''(["'])''', r'\\\1', s) # do not escape '\' +# Deprecated; use the unescape() from cssdefs instead def unescape(s): return re.sub(r'''\\(['"\\])''', r'\1', s) # do unescape '\' |