From 956e2513e7fbba0a1acbf3ce7bb3cf53922ce557 Mon Sep 17 00:00:00 2001 From: "Eevee (Alex Munroe)" Date: Tue, 10 Sep 2013 18:32:42 -0700 Subject: Use Selector objects throughout the compile process. Required a bit of heavy lifting, but the result is shorter, faster, and leaves less cruft in the compiler. --- scss/__init__.py | 132 ++++---- scss/rule.py | 334 +++------------------ scss/selector.py | 327 +++++++++++++++----- scss/tests/files/original-doctests/026-forum-2.css | 4 +- 4 files changed, 350 insertions(+), 447 deletions(-) diff --git a/scss/__init__.py b/scss/__init__.py index 8202d74..9407036 100644 --- a/scss/__init__.py +++ b/scss/__init__.py @@ -480,23 +480,24 @@ class Scss(object): Returns a 2-tuple: a set of selectors, and a set of extended selectors. """ - # Fixe tabs and spaces in selectors + # Fix tabs and spaces in selectors raw_selectors = _spaces_re.sub(' ', raw_selectors) - parents = set() - if ' extends ' in raw_selectors: - selectors = set() - for key in raw_selectors.split(','): - child, _, parent = key.partition(' extends ') - child = child.strip() - parent = parent.strip() - selectors.add(child) - parents.update(s.strip() for s in parent.split('&')) + import re + + from scss.selector import Selector + + parts = re.split(r'\s+extends\s+', raw_selectors, 1) + if len(parts) > 1: + unparsed_selectors, unsplit_parents = parts + # Multiple `extends` are delimited by `&` + unparsed_parents = unsplit_parents.split('&') else: - selectors = set(s.strip() for s in raw_selectors.split(',')) + unparsed_selectors, = parts + unparsed_parents = () - selectors.discard('') - parents.discard('') + selectors = Selector.parse_many(unparsed_selectors) + parents = [Selector.parse_one(parent) for parent in unparsed_parents] return selectors, parents @@ -564,9 +565,12 @@ class Scss(object): elif code == '@import': self._do_import(rule, p_children, scope, block) elif code == '@extend': + from scss.selector import Selector selectors = calculator.apply_vars(block.argument) - rule.extends_selectors.update(p.strip() for p in selectors.replace(',', '&').split('&')) - rule.extends_selectors.discard('') + # XXX this no longer handles `&`, which is from xcss + rule.extends_selectors.extend(Selector.parse_many(selectors)) + #rule.extends_selectors.update(p.strip() for p in selectors.replace(',', '&').split('&')) + #rule.extends_selectors.discard('') elif code == '@return': ret = calculator.calculate(block.argument) rule.retval = ret @@ -1217,8 +1221,9 @@ class Scss(object): calculator = Calculator(rule.namespace) block.header.argument = calculator.apply_vars(block.header.argument) - new_ancestry = list(rule.ancestry) - if block.directive == '@media' and rule.ancestry: + # TODO merge into RuleAncestry + new_ancestry = list(rule.ancestry.headers) + if block.directive == '@media' and new_ancestry: for i, header in reversed(list(enumerate(new_ancestry))): if header.is_selector: continue @@ -1235,6 +1240,7 @@ class Scss(object): else: new_ancestry.append(block.header) + from scss.rule import RuleAncestry new_rule = SassRule( source_file=rule.source_file, lineno=block.lineno, @@ -1244,7 +1250,7 @@ class Scss(object): options=rule.options.copy(), #properties #extends_selectors - ancestry=new_ancestry, + ancestry=RuleAncestry(new_ancestry), namespace=rule.namespace.derive(), ) @@ -1260,33 +1266,7 @@ class Scss(object): raw_selectors = calculator.apply_vars(block.prop) c_selectors, c_parents = self.parse_selectors(raw_selectors) - p_selectors = rule.selectors - if not p_selectors: - # If no parents, pretend there's a single dummy parent selector - # so the loop below runs once - # XXX this is grody man and leaks down through all over the place - p_selectors = frozenset(('',)) - - better_selectors = set() - for c_selector in c_selectors: - for p_selector in p_selectors: - if c_selector == 'self': - # xCSS extension: "self" means to hoist to the parent - better_selectors.add(p_selector) - elif '&' in c_selector: # Parent References - better_selectors.add(c_selector.replace('&', p_selector)) - elif p_selector: - better_selectors.add(p_selector + ' ' + c_selector) - else: - better_selectors.add(c_selector) - - # Merge ancestry - from scss.rule import BlockSelectorHeader - selector_header = BlockSelectorHeader(better_selectors) - if rule.ancestry and rule.ancestry[-1].is_selector: - new_ancestry = rule.ancestry[:-1] + [selector_header] - else: - new_ancestry = rule.ancestry + [selector_header] + new_ancestry = rule.ancestry.with_selectors(c_selectors) _rule = SassRule( source_file=rule.source_file, @@ -1315,10 +1295,15 @@ class Scss(object): # TODO: fix the Selector rendering to put the right amount of space in # the right places # TODO: child/sibling/etc selectors aren't handled correctly + # TODO: preserve selector order # TODO: %foo may not be handled correctly # TODO: a whole bunch of unit tests for Selector parsing # TODO: make sure this all works for kronuz # TODO: steal a TONNNNNN of tests from ruby and sassc for this + # TODO: can we skip all this woek if we've never seen an @extend? + + # TODO: does this correctly handle extending a rule with a different + # ancestry? @@ -1327,7 +1312,7 @@ class Scss(object): # First, rig a way to find arbitrary selectors quickly. Most selectors # revolve around elements, classes, and IDs, so parse those out and use # them as a rough key. Ignore order and duplication for now. - from scss.rule import Selector + from scss.selector import Selector key_to_selectors = defaultdict(set) selector_to_rules = defaultdict(list) pos = 0 @@ -1336,8 +1321,7 @@ class Scss(object): pos += 1 for selector in rule.selectors: - selobj, = Selector.parse(selector) - for key in selobj.lookup_key(): + for key in selector.lookup_key(): key_to_selectors[key].add(selector) selector_to_rules[selector].append(rule) @@ -1347,40 +1331,38 @@ class Scss(object): for selector in rule.extends_selectors: extends_selectors = [] - selobj, = Selector.parse(selector) import operator - candidates = reduce(operator.and_, (key_to_selectors[key] for key in selobj.lookup_key())) - for cand in candidates: - extend_selector_obj, = Selector.parse(cand) - if extend_selector_obj.is_superset_of(selobj): - extends_selectors.append(extend_selector_obj) + candidates = reduce(operator.and_, (key_to_selectors[key] for key in selector.lookup_key())) + for candidate in candidates: + if candidate.is_superset_of(selector): + extends_selectors.append(candidate) if not extends_selectors: log.warn("no match found") continue # do magic here - for extend_selector_obj in extends_selectors: - for parent_rule in selector_to_rules[extend_selector_obj.original_selector]: + for extend_selector in extends_selectors: + for parent_rule in selector_to_rules[extend_selector]: rule_selector, = rule.selectors # TODO - new_parents = extend_selector_obj.substitute( - Selector.parse(selector)[0], - Selector.parse(rule_selector)[0], + new_parents = extend_selector.substitute( + selector, + rule_selector, ) existing_parent_selectors = list(parent_rule.selectors) for parent in new_parents: - existing_parent_selectors.append(parent.render()) + existing_parent_selectors.append(parent) parent_rule.selectors = frozenset(existing_parent_selectors) parent_rule.dependent_rules.add(rule.position) # Update indices, in case any later rules try to extend # this one for parent in new_parents: - key_to_selectors[parent.lookup_key()].add(parent.render()) + key_to_selectors[parent].add(parent) # TODO this could lead to duplicates? maybe should # be a set too - selector_to_rules[parent.render()].append(parent_rule) + selector_to_rules[parent].append(parent_rule) @print_timing(3) def manage_order(self): @@ -1429,7 +1411,7 @@ class Scss(object): def _create_css(self, rules, sc=True, sp=' ', tb=' ', nl='\n', debug_info=False): skip_selectors = False - old_ancestry = [] + prev_ancestry_headers = [] textwrap.TextWrapper.wordsep_re = re.compile(r'(?<=,)(\s*)') if hasattr(textwrap.TextWrapper, 'wordsep_simple_re'): @@ -1450,7 +1432,7 @@ class Scss(object): ancestry = rule.ancestry first_mismatch = 0 - for i, (old_header, new_header) in enumerate(zip(old_ancestry, ancestry)): + for i, (old_header, new_header) in enumerate(zip(prev_ancestry_headers, ancestry.headers)): if old_header != new_header: first_mismatch = i break @@ -1459,15 +1441,17 @@ class Scss(object): # trailing semicolon. If the previous block isn't being closed, # that trailing semicolon needs adding in to separate the last # property from the next rule. - if not sc and dangling_property and first_mismatch >= len(old_ancestry): + if not sc and dangling_property and first_mismatch >= len(prev_ancestry_headers): result += ';' # Close blocks and outdent as necessary - for i in range(len(old_ancestry), first_mismatch, -1): + for i in range(len(prev_ancestry_headers), first_mismatch, -1): result += tb * (i - 1) + '}' + nl # Open new blocks as necessary for i in range(first_mismatch, len(ancestry)): + header = ancestry.headers[i] + if debug_info: if not rule.source_file.is_string: filename = rule.source_file.filename @@ -1478,19 +1462,19 @@ class Scss(object): filename = _escape_chars_re.sub(r'\\\1', filename) result += "@media -sass-debug-info{filename{font-family:file\:\/\/%s}line{font-family:\\00003%s}}" % (filename, lineno) + nl - if ancestry[i].is_selector: - header = ancestry[i].render(sep=',' + sp, super_selector=self.super_selector) + if header.is_selector: + header_string = header.render(sep=',' + sp, super_selector=self.super_selector) if nl: - header = nl.join(wrap(header)) + header_string = nl.join(wrap(header_string)) else: - header = ancestry[i].render() - result += tb * i + header + sp + '{' + nl + header_string = header.render() + result += tb * i + header_string + sp + '{' + nl total_rules += 1 - if ancestry[i].is_selector: + if header.is_selector: total_selectors += 1 - old_ancestry = ancestry + prev_ancestry_headers = ancestry.headers dangling_property = False if not skip_selectors: @@ -1498,7 +1482,7 @@ class Scss(object): dangling_property = True # Close all remaining blocks - for i in reversed(range(len(old_ancestry))): + for i in reversed(range(len(prev_ancestry_headers))): result += tb * i + '}' + nl return (result, total_rules, total_selectors) diff --git a/scss/rule.py b/scss/rule.py index 1c669b2..b263234 100644 --- a/scss/rule.py +++ b/scss/rule.py @@ -5,6 +5,7 @@ import six import logging from scss.cssdefs import _has_placeholder_re +from scss.selector import Selector from scss.types import Value @@ -174,7 +175,7 @@ class SassRule(object): self.retval = None if ancestry is None: - self.ancestry = [] + self.ancestry = RuleAncestry() else: self.ancestry = ancestry @@ -187,22 +188,22 @@ class SassRule(object): @property def selectors(self): # TEMPORARY - if self.ancestry and self.ancestry[-1].is_selector: - return frozenset(self.ancestry[-1].selectors) + if self.ancestry.headers and self.ancestry.headers[-1].is_selector: + return self.ancestry.headers[-1].selectors else: - return frozenset() + return () @selectors.setter def selectors(self, value): - for header in reversed(self.ancestry): - if header.is_selector: - header.selectors |= value - return - else: - # TODO media - break + new_header = BlockSelectorHeader(value) + if self.ancestry.headers and self.ancestry.headers[-1].is_selector: + # Replace existing selectors + new_headers = self.ancestry.headers[:-1] + (new_header,) + else: + # We're nested inside something; add new selectors + new_headers = self.ancestry.headers + (new_header,) - self.ancestry.append(BlockSelectorHeader(value)) + self.ancestry = RuleAncestry(new_headers) @property def file_and_line(self): @@ -247,294 +248,37 @@ class SassRule(object): ) -class Selector(object): - """A single CSS selector.""" - - def __init__(self, selector, tree): - """Private; please use parse().""" - self.original_selector = selector - self._tree = tree +class RuleAncestry(object): + def __init__(self, headers=()): + self.headers = tuple(headers) - @classmethod - def parse(cls, selector): - # Super dumb little selector parser - - # Yes, yes, this is a regex tokenizer. The actual meaning of the - # selector doesn't matter; the parts are just important for matching up - # during @extend. - import re - tokenizer = re.compile( - r''' - # Colons introduce pseudo-selectors, sometimes with parens - # TODO doesn't handle quoted ) - [:]+ [-\w]+ (?: [(] .+? [)] )? - - # Square brackets are attribute tests - # TODO: this doesn't handle ] within a string - | [[] .+? []] - - # Dot and pound start class/id selectors. Percent starts a Sass - # extend-target faux selector. - | [.#%] [-\w]+ - - # Plain identifiers, or single asterisks, are element names - | [-\w]+ - | [*] - - # These guys are combinators -- note that a single space counts too - | \s* [ +>~] \s* - - # And as a last-ditch effort for something really outlandish (e.g. - # percentages as faux-selectors in @keyframes), just eat up to the - # next whitespace - | (\S+) - ''', re.VERBOSE | re.MULTILINE) - - # Selectors have three levels: simple, combinator, comma-delimited. - # Each combinator can only appear once as a delimiter between simple - # selectors, so it can be thought of as a prefix. - # So this: - # a.b + c, d#e - # parses into two Selectors with these structures: - # [[' ', 'a', '.b'], ['+', 'c']] - # [[' ', 'd', '#e']] - # Note that the first simple selector has an implied descendant - # combinator -- i.e., it is a descendant of the root element. - trees = [[[' ']]] - pos = 0 - while pos < len(selector): - # TODO i don't think this deals with " + " correctly. anywhere. - m = tokenizer.match(selector, pos) - if not m: - # TODO prettify me - raise SyntaxError("Couldn't parse selector: %r" % (selector,)) - - token = m.group(0) - if token == ',': - trees.append([[' ']]) - elif token in ' +>~': - trees[-1].append([token]) - else: - trees[-1][-1].append(token) + def __len__(self): + return len(self.headers) - pos += len(token) + def with_selectors(self, c_selectors): + if self.headers and self.headers[-1].is_selector: + # Need to merge with parent selectors + p_selectors = self.headers[-1].selectors - # TODO this passes the whole selector, not just the part - return [cls(selector, tree) for tree in trees] - - def __repr__(self): - return "<%s: %r>" % (type(self).__name__, self._tree) - - def lookup_key(self): - """Build a key from the "important" parts of a selector: elements, - classes, ids. - """ - # TODO how does this work with multiple selectors - parts = set() - for node in self._tree: - for token in node[1:]: - if token[0] not in ':[': - parts.add(token) - - if not parts: - # Should always have at least ONE key; selectors with no elements, - # no classes, and no ids can be indexed as None to avoid a scan of - # every selector in the entire document - parts.add(None) - - return frozenset(parts) - - def is_superset_of(self, other): - assert isinstance(other, Selector) - - idx = 0 - for other_node in other._tree: - if idx >= len(self._tree): - return False - - while idx < len(self._tree): - node = self._tree[idx] - idx += 1 - - if node[0] == other_node[0] and set(node[1:]) <= set(other_node[1:]): - break - - return True + new_selectors = [] + for p_selector in p_selectors: + for c_selector in c_selectors: + new_selectors.append(c_selector.with_parent(p_selector)) - def substitute(self, target, replacement): - """Return a list of selectors obtained by replacing the `target` - selector with `replacement`. + # Replace the last header with the new merged selectors + new_headers = self.headers[:-1] + (BlockSelectorHeader(new_selectors),) + return RuleAncestry(new_headers) - Herein lie the guts of the Sass @extend directive. - - In general, for a selector ``a X b Y c``, a target ``X Y``, and a - replacement ``q Z``, return the selectors ``a q X b Z c`` and ``q a X b - Z c``. Note in particular that no more than two selectors will be - returned, and the permutation of ancestors will never insert new simple - selectors "inside" the target selector. - """ - - # Find the hinge in the parent selector, and split it into before/after - p_before, p_extras, p_after = self.break_around(target._tree) - - # The replacement has no hinge; it only has the most specific simple - # selector (which is the part that replaces "self" in the parent) and - # whatever preceding simple selectors there may be - r_trail = replacement._tree[:-1] - r_extras = replacement._tree[-1] - - # TODO is this the right order? - # TODO what if the prefix doesn't match? who wins? should we even get - # this far? - focal_node = [p_extras[0]] - focal_node.extend(sorted( - p_extras[1:] + r_extras[1:], - key=lambda token: {'#':1,'.':2,':':3}.get(token[0], 0))) - - befores = self._merge_trails(p_before, r_trail) - - return [Selector(None, before + focal_node + p_after) for before in befores] - - def break_around(self, hinge): - """Given a simple selector node contained within this one (a "hinge"), - break it in half and return a parent selector, extra specifiers for the - hinge, and a child selector. - - That is, given a hinge X, break the selector A + X.y B into A, + .y, - and B. - """ - hinge_start = hinge[0] - for i, node in enumerate(self._tree): - # TODO does first combinator have to match? maybe only if the - # hinge has a non-descendant combinator? - if set(hinge_start[1:]) <= set(node[1:]): - start_idx = i - break - else: - raise ValueError("Couldn't find hinge %r in compound selector %r", (hinge_start, self._tree)) - - for i, hinge_node in enumerate(hinge): - self_node = self._tree[start_idx + i] - if hinge_node[0] == self_node[0] and set(hinge_node[1:]) <= set(self_node[1:]): - continue - - # TODO this isn't true; consider finding `a b` in `a c a b` - raise TypeError("no match") - - end_idx = start_idx + len(hinge) - 1 - focal_node = self._tree[end_idx] - extras = [focal_node[0]] + [token for token in focal_node[1:] if token not in hinge[-1]] - return self._tree[:start_idx], extras, self._tree[end_idx + 1:] - - @staticmethod - def _merge_trails(left, right): - # XXX docs docs docs - - if not left or not right: - # At least one is empty, so there are no conflicts; just - # return whichever isn't empty - return [left or right] - - sequencer = LeastCommonSubsequencer(left, right, eq=_merge_selector_nodes) - lcs = sequencer.find() - - ret = [[]] - left_last = 0 - right_last = 0 - for left_next, right_next, merged in lcs: - left_prefix = left[left_last:left_next] - right_prefix = right[right_last:right_next] - - new_ret = [ - node + left_prefix + right_prefix + [merged] - for node in ret] - if left_prefix and right_prefix: - new_ret.extend( - node + right_prefix + left_prefix + [merged] - for node in ret) - ret = new_ret - - left_last = left_next + 1 - right_last = right_next + 1 - - left_prefix = left[left_last:] - right_prefix = right[right_last:] - # TODO factor this out - new_ret = [ - node + left_prefix + right_prefix - for node in ret] - if left_prefix and right_prefix: - new_ret.extend( - node + right_prefix + left_prefix - for node in ret) - ret = new_ret - - return ret - - def render(self): - return ''.join(''.join(node) for node in self._tree).lstrip() - - -def _merge_selector_nodes(a, b): - # TODO document, turn me into a method on something - # TODO what about combinators - aset = frozenset(a[1:]) - bset = frozenset(b[1:]) - if aset <= bset: - return a + [token for token in b[1:] if token not in aset] - elif bset <= aset: - return b + [token for token in a[1:] if token not in bset] - else: - return None - - - -class LeastCommonSubsequencer(object): - # http://en.wikipedia.org/wiki/Longest_common_subsequence_problem#Code_for_the_dynamic_programming_solution - def __init__(self, a, b, eq=lambda a, b: a if a == b else None): - self.a = a - self.b = b - self.eq_matrix = dict() - self.length_matrix = dict() - - self.init_eq_matrix(eq) - self.init_length_matrix() - - def init_eq_matrix(self, eq): - for ai, aval in enumerate(self.a): - for bi, bval in enumerate(self.b): - self.eq_matrix[ai, bi] = eq(aval, bval) - - def init_length_matrix(self): - for ai in range(-1, len(self.a)): - for bi in range(-1, len(self.b)): - if ai == -1 or bi == -1: - l = 0 - elif self.eq_matrix[ai, bi]: - l = self.length_matrix[ai - 1, bi - 1] + 1 - else: - l = max( - self.length_matrix[ai, bi - 1], - self.length_matrix[ai - 1, bi]) - - self.length_matrix[ai, bi] = l - - def backtrack(self, ai, bi): - if ai < 0 or bi < 0: - # Base case: backtracked beyond the beginning with no match - return [] - - merged = self.eq_matrix[ai, bi] - if merged is not None: - return self.backtrack(ai - 1, bi - 1) + [(ai, bi, merged)] - - if self.length_matrix[ai, bi - 1] > self.length_matrix[ai - 1, bi]: - return self.backtrack(ai, bi - 1) else: - return self.backtrack(ai - 1, bi) + # Whoops, no parent selectors. Just need to double-check that + # there are no uses of `&`. + for c_selector in c_selectors: + if c_selector.has_parent_reference: + raise ValueError("Can't use parent selector '&' in top-level rules") - def find(self): - return self.backtrack(len(self.a) - 1, len(self.b) - 1) + # Add the children as a new header + new_headers = self.headers + (BlockSelectorHeader(c_selectors),) + return RuleAncestry(new_headers) class BlockHeader(object): @@ -618,9 +362,9 @@ class BlockSelectorHeader(BlockHeader): def render(self, sep=', ', super_selector=''): return sep.join(sorted( - super_selector + s + super_selector + s.render() for s in self.selectors - if not _has_placeholder_re.search(s))) + if not s.has_placeholder)) class BlockScopeHeader(BlockHeader): diff --git a/scss/selector.py b/scss/selector.py index 086d511..a235f88 100644 --- a/scss/selector.py +++ b/scss/selector.py @@ -1,80 +1,262 @@ +import re + +# Super dumb little selector parser. + +# Yes, yes, this is a regex tokenizer. The actual meaning of the +# selector doesn't matter; the parts are just important for matching up +# during @extend. + +# Selectors have three levels: simple, combinator, comma-delimited. +# Each combinator can only appear once as a delimiter between simple +# selectors, so it can be thought of as a prefix. +# So this: +# a.b + c, d#e +# parses into two Selectors with these structures: +# [[' ', 'a', '.b'], ['+', 'c']] +# [[' ', 'd', '#e']] +# Note that the first simple selector has an implied descendant +# combinator -- i.e., it is a descendant of the root element. +# TODO `*html` is incorrectly parsed as a single selector +# TODO this oughta be touched up for css4 selectors +SELECTOR_TOKENIZER = re.compile( +r''' + # Colons introduce pseudo-selectors, sometimes with parens + # TODO doesn't handle quoted ) + [:]+ [-\w]+ (?: [(] .+? [)] )? + + # These guys are combinators -- note that a single space counts too + | \s* [ +>~] \s* + + # Square brackets are attribute tests + # TODO: this doesn't handle ] within a string + | [[] .+? []] + + # Dot and pound start class/id selectors. Percent starts a Sass + # extend-target faux selector. + | [.#%] [-\w]+ + + # Percentages are used for @keyframes + | \d+ [%] + + # Plain identifiers, or single asterisks, are element names + | [-\w]+ + | [*] + + # & is the sass replacement token + | [&] + + # And as a last-ditch effort, just eat up to whitespace + | (\S+) +''', re.VERBOSE | re.MULTILINE) + + +# Maps the first character of a token to a rough ordering. The default +# (element names) is zero. +TOKEN_TYPE_ORDER = { + '#': 2, + '.': 3, + ':': 4, + '[': 5, + '%': 6, +} +TOKEN_SORT_KEY = lambda token: TOKEN_TYPE_ORDER.get(token[0], 0) + + +class SimpleSelector(object): + def __init__(self, combinator, tokens): + self.combinator = combinator + # TODO enforce that only one element name (including *) appears in a + # selector + # TODO remove duplicates + self.tokens = tuple(sorted(tokens, key=TOKEN_SORT_KEY)) + + def __repr__(self): + return "<%s: %r>" % (type(self).__name__, self.render()) + + def __hash__(self): + return hash((self.combinator, self.tokens)) + + def __eq__(self, other): + if not isinstance(other, SimpleSelector): + return NotImplemented + + return self.combinator == other.combinator and self.tokens == other.tokens + + @property + def has_parent_reference(self): + return '&' in self.tokens or 'self' in self.tokens + + @property + def has_placeholder(self): + return any( + token[0] == '%' + for token in self.tokens) + + def is_superset_of(self, other): + return ( + self.combinator == other.combinator and + set(self.tokens) <= set(other.tokens)) + + def replace_parent(self, parent_simples): + assert parent_simples + + ancestors = parent_simples[:-1] + parent = parent_simples[-1] + + did_replace = False + new_tokens = [] + for token in self.tokens: + if not did_replace and token in ('&', 'self'): + did_replace = True + new_tokens.extend(parent.tokens) + else: + new_tokens.append(token) + + if did_replace: + # This simple selector was merged into the direct parent + merged_simple = type(self)(self.combinator, new_tokens) + return ancestors + (merged_simple,) + else: + # This simple selector is completely separate + return parent_simples + (self,) + + # TODO just use set ops for these, once the constructor removes dupes + def merge_with(self, other): + new_tokens = self.tokens + tuple(token for token in other.tokens if token not in set(self.tokens)) + return type(self)(self.combinator, new_tokens) + + def difference(self, other): + new_tokens = tuple(token for token in self.tokens if token not in set(other.tokens)) + return type(self)(self.combinator, new_tokens) + + def render(self): + # TODO fail if there are no tokens, or if one is a placeholder? + rendered = ''.join(self.tokens) + if self.combinator != ' ': + rendered = ' '.join((self.combinator, rendered)) + + return rendered + + class Selector(object): """A single CSS selector.""" def __init__(self, selector, tree): """Private; please use parse().""" self.original_selector = selector - self._tree = tree + # TODO rename this + # TODO enforce uniqueness + self._tree = tuple(tree) @classmethod - def parse(cls, selector): - # Super dumb little selector parser - - # Yes, yes, this is a regex tokenizer. The actual meaning of the - # selector doesn't matter; the parts are just important for matching up - # during @extend. - import re - tokenizer = re.compile( - r''' - # Colons introduce pseudo-selectors, sometimes with parens - # TODO doesn't handle quoted ) - [:]+ [-\w]+ (?: [(] .+? [)] )? - - # Square brackets are attribute tests - # TODO: this doesn't handle ] within a string - | [[] .+? []] - - # Dot and pound start class/id selectors. Percent starts a Sass - # extend-target faux selector. - | [.#%] [-\w]+ - - # Plain identifiers, or single asterisks, are element names - | [-\w]+ - | [*] - - # These guys are combinators -- note that a single space counts too - | \s* [ +>~] \s* - - # And as a last-ditch effort for something really outlandish (e.g. - # percentages as faux-selectors in @keyframes), just eat up to the - # next whitespace - | (\S+) - ''', re.VERBOSE | re.MULTILINE) - - # Selectors have three levels: simple, combinator, comma-delimited. - # Each combinator can only appear once as a delimiter between simple - # selectors, so it can be thought of as a prefix. - # So this: - # a.b + c, d#e - # parses into two Selectors with these structures: - # [[' ', 'a', '.b'], ['+', 'c']] - # [[' ', 'd', '#e']] - # Note that the first simple selector has an implied descendant - # combinator -- i.e., it is a descendant of the root element. - trees = [[[' ']]] + def parse_many(cls, selector): + selector = selector.strip() + ret = [] + pending_tree = [] + pending_combinator = ' ' + pending_tokens = [] + pos = 0 while pos < len(selector): # TODO i don't think this deals with " + " correctly. anywhere. - m = tokenizer.match(selector, pos) + m = SELECTOR_TOKENIZER.match(selector, pos) if not m: # TODO prettify me raise SyntaxError("Couldn't parse selector: %r" % (selector,)) token = m.group(0) + pos += len(token) + + # Kill any extraneous space, BUT make sure not to turn a lone space + # into an empty string + token = token.strip() or ' ' + if token == ',': - trees.append([[' ']]) + # End current selector + # TODO what about "+ ,"? what do i even do with that + if pending_tokens: + pending_tree.append( + SimpleSelector(pending_combinator, pending_tokens)) + if pending_tree: + ret.append(cls(selector, pending_tree)) + pending_tree = [] + pending_combinator = ' ' + pending_tokens = [] elif token in ' +>~': - trees[-1].append([token]) + # End current simple selector + if pending_tokens: + pending_tree.append( + SimpleSelector(pending_combinator, pending_tokens)) + pending_combinator = token + pending_tokens = [] else: - trees[-1][-1].append(token) + # Add to pending simple selector + pending_tokens.append(token) - pos += len(token) - # TODO this passes the whole selector, not just the part - return [cls(selector, tree) for tree in trees] + # Deal with any remaining pending bits + # TODO reduce copy-paste yikes + if pending_tokens: + pending_tree.append( + SimpleSelector(pending_combinator, pending_tokens)) + if pending_tree: + ret.append(cls(selector, pending_tree)) + + return ret + + @classmethod + def parse(cls, selector_string): + # TODO remove me + return cls.parse_many(selector_string) + + @classmethod + def parse_one(cls, selector_string): + selectors = cls.parse_many(selector_string) + if len(selectors) != 1: + # TODO better error + raise ValueError + + return selectors[0] def __repr__(self): - return "<%s: %r>" % (type(self).__name__, self._tree) + return "<%s: %r>" % (type(self).__name__, self.render()) + + def __hash__(self): + return hash(self._tree) + + def __eq__(self, other): + if not isinstance(other, Selector): + return NotImplemented + + return self._tree == other._tree + + @property + def has_parent_reference(self): + return any( + simple.has_parent_reference + for simple in self._tree) + + @property + def has_placeholder(self): + return any( + simple.has_placeholder + for simple in self._tree) + + def with_parent(self, parent): + saw_parent_ref = False + + new_tree = [] + for simple in self._tree: + if simple.has_parent_reference: + new_tree.extend(simple.replace_parent(parent._tree)) + saw_parent_ref = True + else: + new_tree.append(simple) + + if not saw_parent_ref: + new_tree = parent._tree + tuple(new_tree) + + return type(self)("", new_tree) def lookup_key(self): """Build a key from the "important" parts of a selector: elements, @@ -83,7 +265,7 @@ class Selector(object): # TODO how does this work with multiple selectors parts = set() for node in self._tree: - for token in node[1:]: + for token in node.tokens: if token[0] not in ':[': parts.add(token) @@ -107,7 +289,7 @@ class Selector(object): node = self._tree[idx] idx += 1 - if node[0] == other_node[0] and set(node[1:]) <= set(other_node[1:]): + if node.is_superset_of(other_node): break return True @@ -134,17 +316,13 @@ class Selector(object): r_trail = replacement._tree[:-1] r_extras = replacement._tree[-1] - # TODO is this the right order? # TODO what if the prefix doesn't match? who wins? should we even get # this far? - focal_node = [p_extras[0]] - focal_node.extend(sorted( - p_extras[1:] + r_extras[1:], - key=lambda token: {'#':1,'.':2,':':3}.get(token[0], 0))) + focal_nodes = (p_extras.merge_with(r_extras),) befores = self._merge_trails(p_before, r_trail) - return [Selector(None, before + focal_node + p_after) for before in befores] + return [Selector(None, before + focal_nodes + p_after) for before in befores] def break_around(self, hinge): """Given a simple selector node contained within this one (a "hinge"), @@ -158,7 +336,7 @@ class Selector(object): for i, node in enumerate(self._tree): # TODO does first combinator have to match? maybe only if the # hinge has a non-descendant combinator? - if set(hinge_start[1:]) <= set(node[1:]): + if hinge_start.is_superset_of(node): start_idx = i break else: @@ -166,7 +344,7 @@ class Selector(object): for i, hinge_node in enumerate(hinge): self_node = self._tree[start_idx + i] - if hinge_node[0] == self_node[0] and set(hinge_node[1:]) <= set(self_node[1:]): + if hinge_node.is_superset_of(self_node): continue # TODO this isn't true; consider finding `a b` in `a c a b` @@ -174,7 +352,7 @@ class Selector(object): end_idx = start_idx + len(hinge) - 1 focal_node = self._tree[end_idx] - extras = [focal_node[0]] + [token for token in focal_node[1:] if token not in hinge[-1]] + extras = focal_node.difference(hinge[-1]) return self._tree[:start_idx], extras, self._tree[end_idx + 1:] @staticmethod @@ -189,7 +367,7 @@ class Selector(object): sequencer = LeastCommonSubsequencer(left, right, eq=_merge_selector_nodes) lcs = sequencer.find() - ret = [[]] + ret = [()] left_last = 0 right_last = 0 for left_next, right_next, merged in lcs: @@ -197,11 +375,11 @@ class Selector(object): right_prefix = right[right_last:right_next] new_ret = [ - node + left_prefix + right_prefix + [merged] + node + left_prefix + right_prefix + (merged,) for node in ret] if left_prefix and right_prefix: new_ret.extend( - node + right_prefix + left_prefix + [merged] + node + right_prefix + left_prefix + (merged,) for node in ret) ret = new_ret @@ -223,23 +401,20 @@ class Selector(object): return ret def render(self): - return ''.join(''.join(node) for node in self._tree).lstrip() + return ' '.join(simple.render() for simple in self._tree).lstrip() def _merge_selector_nodes(a, b): # TODO document, turn me into a method on something # TODO what about combinators - aset = frozenset(a[1:]) - bset = frozenset(b[1:]) - if aset <= bset: - return a + [token for token in b[1:] if token not in aset] - elif bset <= aset: - return b + [token for token in a[1:] if token not in bset] + if a.is_superset_of(b): + return a.merge_with(b) + elif b.is_superset_of(a): + return b.merge_with(a) else: return None - class LeastCommonSubsequencer(object): # http://en.wikipedia.org/wiki/Longest_common_subsequence_problem#Code_for_the_dynamic_programming_solution def __init__(self, a, b, eq=lambda a, b: a if a == b else None): diff --git a/scss/tests/files/original-doctests/026-forum-2.css b/scss/tests/files/original-doctests/026-forum-2.css index 4991b75..3246676 100644 --- a/scss/tests/files/original-doctests/026-forum-2.css +++ b/scss/tests/files/original-doctests/026-forum-2.css @@ -4,9 +4,9 @@ *html .b { color: #000; } -*:first-child+html .a { +*:first-child + html .a { color: #fff; } -*:first-child+html .b { +*:first-child + html .b { color: #000; } -- cgit v1.2.1