Use Selector objects throughout the compile process.

Required a bit of heavy lifting, but the result is shorter, faster, and leaves less cruft in the compiler.
author: Eevee (Alex Munroe) <eevee.git@veekun.com> 2013-09-10 18:32:42 -0700
committer: Eevee (Alex Munroe) <eevee.git@veekun.com> 2013-09-10 18:32:42 -0700
commit: 956e2513e7fbba0a1acbf3ce7bb3cf53922ce557 (patch)
tree: 62089def7e44df6637a646a06e250faa0d61f28d
parent: adc605c7a883ef755c1e22270f5adfd9d09e936c (diff)
download: pyscss-956e2513e7fbba0a1acbf3ce7bb3cf53922ce557.tar.gz
4 files changed, 350 insertions, 447 deletions
diff --git a/scss/__init__.py b/scss/__init__.py
index 8202d74..9407036 100644
--- a/scss/__init__.py
+++ b/scss/__init__.py
@@ -480,23 +480,24 @@ class Scss(object):
 
         Returns a 2-tuple: a set of selectors, and a set of extended selectors.
         """
-        # Fixe tabs and spaces in selectors
+        # Fix tabs and spaces in selectors
         raw_selectors = _spaces_re.sub(' ', raw_selectors)
 
-        parents = set()
-        if ' extends ' in raw_selectors:
-            selectors = set()
-            for key in raw_selectors.split(','):
-                child, _, parent = key.partition(' extends ')
-                child = child.strip()
-                parent = parent.strip()
-                selectors.add(child)
-                parents.update(s.strip() for s in parent.split('&'))
+        import re
+
+        from scss.selector import Selector
+
+        parts = re.split(r'\s+extends\s+', raw_selectors, 1)
+        if len(parts) > 1:
+            unparsed_selectors, unsplit_parents = parts
+            # Multiple `extends` are delimited by `&`
+            unparsed_parents = unsplit_parents.split('&')
         else:
-            selectors = set(s.strip() for s in raw_selectors.split(','))
+            unparsed_selectors, = parts
+            unparsed_parents = ()
 
-        selectors.discard('')
-        parents.discard('')
+        selectors = Selector.parse_many(unparsed_selectors)
+        parents = [Selector.parse_one(parent) for parent in unparsed_parents]
 
         return selectors, parents
 
@@ -564,9 +565,12 @@ class Scss(object):
                 elif code == '@import':
                     self._do_import(rule, p_children, scope, block)
                 elif code == '@extend':
+                    from scss.selector import Selector
                     selectors = calculator.apply_vars(block.argument)
-                    rule.extends_selectors.update(p.strip() for p in selectors.replace(',', '&').split('&'))
-                    rule.extends_selectors.discard('')
+                    # XXX this no longer handles `&`, which is from xcss
+                    rule.extends_selectors.extend(Selector.parse_many(selectors))
+                    #rule.extends_selectors.update(p.strip() for p in selectors.replace(',', '&').split('&'))
+                    #rule.extends_selectors.discard('')
                 elif code == '@return':
                     ret = calculator.calculate(block.argument)
                     rule.retval = ret
@@ -1217,8 +1221,9 @@ class Scss(object):
         calculator = Calculator(rule.namespace)
         block.header.argument = calculator.apply_vars(block.header.argument)
 
-        new_ancestry = list(rule.ancestry)
-        if block.directive == '@media' and rule.ancestry:
+        # TODO merge into RuleAncestry
+        new_ancestry = list(rule.ancestry.headers)
+        if block.directive == '@media' and new_ancestry:
             for i, header in reversed(list(enumerate(new_ancestry))):
                 if header.is_selector:
                     continue
@@ -1235,6 +1240,7 @@ class Scss(object):
         else:
             new_ancestry.append(block.header)
 
+        from scss.rule import RuleAncestry
         new_rule = SassRule(
             source_file=rule.source_file,
             lineno=block.lineno,
@@ -1244,7 +1250,7 @@ class Scss(object):
             options=rule.options.copy(),
             #properties
             #extends_selectors
-            ancestry=new_ancestry,
+            ancestry=RuleAncestry(new_ancestry),
 
             namespace=rule.namespace.derive(),
         )
@@ -1260,33 +1266,7 @@ class Scss(object):
         raw_selectors = calculator.apply_vars(block.prop)
         c_selectors, c_parents = self.parse_selectors(raw_selectors)
 
-        p_selectors = rule.selectors
-        if not p_selectors:
-            # If no parents, pretend there's a single dummy parent selector
-            # so the loop below runs once
-            # XXX this is grody man and leaks down through all over the place
-            p_selectors = frozenset(('',))
-
-        better_selectors = set()
-        for c_selector in c_selectors:
-            for p_selector in p_selectors:
-                if c_selector == 'self':
-                    # xCSS extension: "self" means to hoist to the parent
-                    better_selectors.add(p_selector)
-                elif '&' in c_selector:  # Parent References
-                    better_selectors.add(c_selector.replace('&', p_selector))
-                elif p_selector:
-                    better_selectors.add(p_selector + ' ' + c_selector)
-                else:
-                    better_selectors.add(c_selector)
-
-        # Merge ancestry
-        from scss.rule import BlockSelectorHeader
-        selector_header = BlockSelectorHeader(better_selectors)
-        if rule.ancestry and rule.ancestry[-1].is_selector:
-            new_ancestry = rule.ancestry[:-1] + [selector_header]
-        else:
-            new_ancestry = rule.ancestry + [selector_header]
+        new_ancestry = rule.ancestry.with_selectors(c_selectors)
 
         _rule = SassRule(
             source_file=rule.source_file,
@@ -1315,10 +1295,15 @@ class Scss(object):
         # TODO: fix the Selector rendering to put the right amount of space in
         # the right places
         # TODO: child/sibling/etc selectors aren't handled correctly
+        # TODO: preserve selector order
         # TODO: %foo may not be handled correctly
         # TODO: a whole bunch of unit tests for Selector parsing
         # TODO: make sure this all works for kronuz
         # TODO: steal a TONNNNNN of tests from ruby and sassc for this
+        # TODO: can we skip all this woek if we've never seen an @extend?
+
+        # TODO: does this correctly handle extending a rule with a different
+        # ancestry?
 
 
 
@@ -1327,7 +1312,7 @@ class Scss(object):
         # First, rig a way to find arbitrary selectors quickly.  Most selectors
         # revolve around elements, classes, and IDs, so parse those out and use
         # them as a rough key.  Ignore order and duplication for now.
-        from scss.rule import Selector
+        from scss.selector import Selector
         key_to_selectors = defaultdict(set)
         selector_to_rules = defaultdict(list)
         pos = 0
@@ -1336,8 +1321,7 @@ class Scss(object):
             pos += 1
 
             for selector in rule.selectors:
-                selobj, = Selector.parse(selector)
-                for key in selobj.lookup_key():
+                for key in selector.lookup_key():
                     key_to_selectors[key].add(selector)
                 selector_to_rules[selector].append(rule)
 
@@ -1347,40 +1331,38 @@ class Scss(object):
             for selector in rule.extends_selectors:
                 extends_selectors = []
 
-                selobj, = Selector.parse(selector)
                 import operator
-                candidates = reduce(operator.and_, (key_to_selectors[key] for key in selobj.lookup_key()))
-                for cand in candidates:
-                    extend_selector_obj, = Selector.parse(cand)
-                    if extend_selector_obj.is_superset_of(selobj):
-                        extends_selectors.append(extend_selector_obj)
+                candidates = reduce(operator.and_, (key_to_selectors[key] for key in selector.lookup_key()))
+                for candidate in candidates:
+                    if candidate.is_superset_of(selector):
+                        extends_selectors.append(candidate)
 
                 if not extends_selectors:
                     log.warn("no match found")
                     continue
 
                 # do magic here
-                for extend_selector_obj in extends_selectors:
-                    for parent_rule in selector_to_rules[extend_selector_obj.original_selector]:
+                for extend_selector in extends_selectors:
+                    for parent_rule in selector_to_rules[extend_selector]:
                         rule_selector, = rule.selectors  # TODO
-                        new_parents = extend_selector_obj.substitute(
-                            Selector.parse(selector)[0],
-                            Selector.parse(rule_selector)[0],
+                        new_parents = extend_selector.substitute(
+                            selector,
+                            rule_selector,
                         )
 
                         existing_parent_selectors = list(parent_rule.selectors)
                         for parent in new_parents:
-                            existing_parent_selectors.append(parent.render())
+                            existing_parent_selectors.append(parent)
                         parent_rule.selectors = frozenset(existing_parent_selectors)
                         parent_rule.dependent_rules.add(rule.position)
 
                         # Update indices, in case any later rules try to extend
                         # this one
                         for parent in new_parents:
-                            key_to_selectors[parent.lookup_key()].add(parent.render())
+                            key_to_selectors[parent].add(parent)
                             # TODO this could lead to duplicates?  maybe should
                             # be a set too
-                            selector_to_rules[parent.render()].append(parent_rule)
+                            selector_to_rules[parent].append(parent_rule)
 
     @print_timing(3)
     def manage_order(self):
@@ -1429,7 +1411,7 @@ class Scss(object):
     def _create_css(self, rules, sc=True, sp=' ', tb='  ', nl='\n', debug_info=False):
         skip_selectors = False
 
-        old_ancestry = []
+        prev_ancestry_headers = []
 
         textwrap.TextWrapper.wordsep_re = re.compile(r'(?<=,)(\s*)')
         if hasattr(textwrap.TextWrapper, 'wordsep_simple_re'):
@@ -1450,7 +1432,7 @@ class Scss(object):
             ancestry = rule.ancestry
 
             first_mismatch = 0
-            for i, (old_header, new_header) in enumerate(zip(old_ancestry, ancestry)):
+            for i, (old_header, new_header) in enumerate(zip(prev_ancestry_headers, ancestry.headers)):
                 if old_header != new_header:
                     first_mismatch = i
                     break
@@ -1459,15 +1441,17 @@ class Scss(object):
             # trailing semicolon.  If the previous block isn't being closed,
             # that trailing semicolon needs adding in to separate the last
             # property from the next rule.
-            if not sc and dangling_property and first_mismatch >= len(old_ancestry):
+            if not sc and dangling_property and first_mismatch >= len(prev_ancestry_headers):
                 result += ';'
 
             # Close blocks and outdent as necessary
-            for i in range(len(old_ancestry), first_mismatch, -1):
+            for i in range(len(prev_ancestry_headers), first_mismatch, -1):
                 result += tb * (i - 1) + '}' + nl
 
             # Open new blocks as necessary
             for i in range(first_mismatch, len(ancestry)):
+                header = ancestry.headers[i]
+
                 if debug_info:
                     if not rule.source_file.is_string:
                         filename = rule.source_file.filename
@@ -1478,19 +1462,19 @@ class Scss(object):
                             filename = _escape_chars_re.sub(r'\\\1', filename)
                             result += "@media -sass-debug-info{filename{font-family:file\:\/\/%s}line{font-family:\\00003%s}}" % (filename, lineno) + nl
 
-                if ancestry[i].is_selector:
-                    header = ancestry[i].render(sep=',' + sp, super_selector=self.super_selector)
+                if header.is_selector:
+                    header_string = header.render(sep=',' + sp, super_selector=self.super_selector)
                     if nl:
-                        header = nl.join(wrap(header))
+                        header_string = nl.join(wrap(header_string))
                 else:
-                    header = ancestry[i].render()
-                result += tb * i + header + sp + '{' + nl
+                    header_string = header.render()
+                result += tb * i + header_string + sp + '{' + nl
 
                 total_rules += 1
-                if ancestry[i].is_selector:
+                if header.is_selector:
                     total_selectors += 1
 
-            old_ancestry = ancestry
+            prev_ancestry_headers = ancestry.headers
             dangling_property = False
 
             if not skip_selectors:
@@ -1498,7 +1482,7 @@ class Scss(object):
                 dangling_property = True
 
         # Close all remaining blocks
-        for i in reversed(range(len(old_ancestry))):
+        for i in reversed(range(len(prev_ancestry_headers))):
             result += tb * i + '}' + nl
 
         return (result, total_rules, total_selectors)
diff --git a/scss/rule.py b/scss/rule.py
index 1c669b2..b263234 100644
--- a/scss/rule.py
+++ b/scss/rule.py
@@ -5,6 +5,7 @@ import six
 import logging
 
 from scss.cssdefs import _has_placeholder_re
+from scss.selector import Selector
 from scss.types import Value
 
 
@@ -174,7 +175,7 @@ class SassRule(object):
         self.retval = None
 
         if ancestry is None:
-            self.ancestry = []
+            self.ancestry = RuleAncestry()
         else:
             self.ancestry = ancestry
 
@@ -187,22 +188,22 @@ class SassRule(object):
     @property
     def selectors(self):
         # TEMPORARY
-        if self.ancestry and self.ancestry[-1].is_selector:
-            return frozenset(self.ancestry[-1].selectors)
+        if self.ancestry.headers and self.ancestry.headers[-1].is_selector:
+            return self.ancestry.headers[-1].selectors
         else:
-            return frozenset()
+            return ()
 
     @selectors.setter
     def selectors(self, value):
-        for header in reversed(self.ancestry):
-            if header.is_selector:
-                header.selectors |= value
-                return
-            else:
-                # TODO media
-                break
+        new_header = BlockSelectorHeader(value)
+        if self.ancestry.headers and self.ancestry.headers[-1].is_selector:
+            # Replace existing selectors
+            new_headers = self.ancestry.headers[:-1] + (new_header,)
+        else:
+            # We're nested inside something; add new selectors
+            new_headers = self.ancestry.headers + (new_header,)
 
-        self.ancestry.append(BlockSelectorHeader(value))
+        self.ancestry = RuleAncestry(new_headers)
 
     @property
     def file_and_line(self):
@@ -247,294 +248,37 @@ class SassRule(object):
         )
 
 
-class Selector(object):
-    """A single CSS selector."""
-
-    def __init__(self, selector, tree):
-        """Private; please use parse()."""
-        self.original_selector = selector
-        self._tree = tree
+class RuleAncestry(object):
+    def __init__(self, headers=()):
+        self.headers = tuple(headers)
 
-    @classmethod
-    def parse(cls, selector):
-        # Super dumb little selector parser
-
-        # Yes, yes, this is a regex tokenizer.  The actual meaning of the
-        # selector doesn't matter; the parts are just important for matching up
-        # during @extend.
-        import re
-        tokenizer = re.compile(
-        r'''
-            # Colons introduce pseudo-selectors, sometimes with parens
-            # TODO doesn't handle quoted )
-            [:]+ [-\w]+ (?: [(] .+? [)] )?
-
-            # Square brackets are attribute tests
-            # TODO: this doesn't handle ] within a string
-            | [[] .+? []]
-
-            # Dot and pound start class/id selectors.  Percent starts a Sass
-            # extend-target faux selector.
-            | [.#%] [-\w]+
-
-            # Plain identifiers, or single asterisks, are element names
-            | [-\w]+
-            | [*]
-
-            # These guys are combinators -- note that a single space counts too
-            | \s* [ +>~] \s*
-
-            # And as a last-ditch effort for something really outlandish (e.g.
-            # percentages as faux-selectors in @keyframes), just eat up to the
-            # next whitespace
-            | (\S+)
-        ''', re.VERBOSE | re.MULTILINE)
-
-        # Selectors have three levels: simple, combinator, comma-delimited.
-        # Each combinator can only appear once as a delimiter between simple
-        # selectors, so it can be thought of as a prefix.
-        # So this:
-        #     a.b + c, d#e
-        # parses into two Selectors with these structures:
-        #     [[' ', 'a', '.b'], ['+', 'c']]
-        #     [[' ', 'd', '#e']]
-        # Note that the first simple selector has an implied descendant
-        # combinator -- i.e., it is a descendant of the root element.
-        trees = [[[' ']]]
-        pos = 0
-        while pos < len(selector):
-            # TODO i don't think this deals with " + " correctly.  anywhere.
-            m = tokenizer.match(selector, pos)
-            if not m:
-                # TODO prettify me
-                raise SyntaxError("Couldn't parse selector: %r" % (selector,))
-
-            token = m.group(0)
-            if token == ',':
-                trees.append([[' ']])
-            elif token in ' +>~':
-                trees[-1].append([token])
-            else:
-                trees[-1][-1].append(token)
+    def __len__(self):
+        return len(self.headers)
 
-            pos += len(token)
+    def with_selectors(self, c_selectors):
+        if self.headers and self.headers[-1].is_selector:
+            # Need to merge with parent selectors
+            p_selectors = self.headers[-1].selectors
 
-        # TODO this passes the whole selector, not just the part
-        return [cls(selector, tree) for tree in trees]
-
-    def __repr__(self):
-        return "<%s: %r>" % (type(self).__name__, self._tree)
-
-    def lookup_key(self):
-        """Build a key from the "important" parts of a selector: elements,
-        classes, ids.
-        """
-        # TODO how does this work with multiple selectors
-        parts = set()
-        for node in self._tree:
-            for token in node[1:]:
-                if token[0] not in ':[':
-                    parts.add(token)
-
-        if not parts:
-            # Should always have at least ONE key; selectors with no elements,
-            # no classes, and no ids can be indexed as None to avoid a scan of
-            # every selector in the entire document
-            parts.add(None)
-
-        return frozenset(parts)
-
-    def is_superset_of(self, other):
-        assert isinstance(other, Selector)
-
-        idx = 0
-        for other_node in other._tree:
-            if idx >= len(self._tree):
-                return False
-
-            while idx < len(self._tree):
-                node = self._tree[idx]
-                idx += 1
-
-                if node[0] == other_node[0] and set(node[1:]) <= set(other_node[1:]):
-                    break
-
-        return True
+            new_selectors = []
+            for p_selector in p_selectors:
+                for c_selector in c_selectors:
+                    new_selectors.append(c_selector.with_parent(p_selector))
 
-    def substitute(self, target, replacement):
-        """Return a list of selectors obtained by replacing the `target`
-        selector with `replacement`.
+            # Replace the last header with the new merged selectors
+            new_headers = self.headers[:-1] + (BlockSelectorHeader(new_selectors),)
+            return RuleAncestry(new_headers)
 
-        Herein lie the guts of the Sass @extend directive.
-
-        In general, for a selector ``a X b Y c``, a target ``X Y``, and a
-        replacement ``q Z``, return the selectors ``a q X b Z c`` and ``q a X b
-        Z c``.  Note in particular that no more than two selectors will be
-        returned, and the permutation of ancestors will never insert new simple
-        selectors "inside" the target selector.
-        """
-
-        # Find the hinge in the parent selector, and split it into before/after
-        p_before, p_extras, p_after = self.break_around(target._tree)
-
-        # The replacement has no hinge; it only has the most specific simple
-        # selector (which is the part that replaces "self" in the parent) and
-        # whatever preceding simple selectors there may be
-        r_trail = replacement._tree[:-1]
-        r_extras = replacement._tree[-1]
-
-        # TODO is this the right order?
-        # TODO what if the prefix doesn't match?  who wins?  should we even get
-        # this far?
-        focal_node = [p_extras[0]]
-        focal_node.extend(sorted(
-            p_extras[1:] + r_extras[1:],
-            key=lambda token: {'#':1,'.':2,':':3}.get(token[0], 0)))
-
-        befores = self._merge_trails(p_before, r_trail)
-
-        return [Selector(None, before + focal_node + p_after) for before in befores]
-
-    def break_around(self, hinge):
-        """Given a simple selector node contained within this one (a "hinge"),
-        break it in half and return a parent selector, extra specifiers for the
-        hinge, and a child selector.
-
-        That is, given a hinge X, break the selector A + X.y B into A, + .y,
-        and B.
-        """
-        hinge_start = hinge[0]
-        for i, node in enumerate(self._tree):
-            # TODO does first combinator have to match?  maybe only if the
-            # hinge has a non-descendant combinator?
-            if set(hinge_start[1:]) <= set(node[1:]):
-                start_idx = i
-                break
-        else:
-            raise ValueError("Couldn't find hinge %r in compound selector %r", (hinge_start, self._tree))
-
-        for i, hinge_node in enumerate(hinge):
-            self_node = self._tree[start_idx + i]
-            if hinge_node[0] == self_node[0] and set(hinge_node[1:]) <= set(self_node[1:]):
-                continue
-
-            # TODO this isn't true; consider finding `a b` in `a c a b`
-            raise TypeError("no match")
-
-        end_idx = start_idx + len(hinge) - 1
-        focal_node = self._tree[end_idx]
-        extras = [focal_node[0]] + [token for token in focal_node[1:] if token not in hinge[-1]]
-        return self._tree[:start_idx], extras, self._tree[end_idx + 1:]
-
-    @staticmethod
-    def _merge_trails(left, right):
-        # XXX docs docs docs
-
-        if not left or not right:
-            # At least one is empty, so there are no conflicts; just
-            # return whichever isn't empty
-            return [left or right]
-
-        sequencer = LeastCommonSubsequencer(left, right, eq=_merge_selector_nodes)
-        lcs = sequencer.find()
-
-        ret = [[]]
-        left_last = 0
-        right_last = 0
-        for left_next, right_next, merged in lcs:
-            left_prefix = left[left_last:left_next]
-            right_prefix = right[right_last:right_next]
-
-            new_ret = [
-                node + left_prefix + right_prefix + [merged]
-                for node in ret]
-            if left_prefix and right_prefix:
-                new_ret.extend(
-                    node + right_prefix + left_prefix + [merged]
-                    for node in ret)
-            ret = new_ret
-
-            left_last = left_next + 1
-            right_last = right_next + 1
-
-        left_prefix = left[left_last:]
-        right_prefix = right[right_last:]
-        # TODO factor this out
-        new_ret = [
-            node + left_prefix + right_prefix
-            for node in ret]
-        if left_prefix and right_prefix:
-            new_ret.extend(
-                node + right_prefix + left_prefix
-                for node in ret)
-        ret = new_ret
-
-        return ret
-
-    def render(self):
-        return ''.join(''.join(node) for node in self._tree).lstrip()
-
-
-def _merge_selector_nodes(a, b):
-    # TODO document, turn me into a method on something
-    # TODO what about combinators
-    aset = frozenset(a[1:])
-    bset = frozenset(b[1:])
-    if aset <= bset:
-        return a + [token for token in b[1:] if token not in aset]
-    elif bset <= aset:
-        return b + [token for token in a[1:] if token not in bset]
-    else:
-        return None
-
-
-
-class LeastCommonSubsequencer(object):
-    # http://en.wikipedia.org/wiki/Longest_common_subsequence_problem#Code_for_the_dynamic_programming_solution
-    def __init__(self, a, b, eq=lambda a, b: a if a == b else None):
-        self.a = a
-        self.b = b
-        self.eq_matrix = dict()
-        self.length_matrix = dict()
-
-        self.init_eq_matrix(eq)
-        self.init_length_matrix()
-
-    def init_eq_matrix(self, eq):
-        for ai, aval in enumerate(self.a):
-            for bi, bval in enumerate(self.b):
-                self.eq_matrix[ai, bi] = eq(aval, bval)
-
-    def init_length_matrix(self):
-        for ai in range(-1, len(self.a)):
-            for bi in range(-1, len(self.b)):
-                if ai == -1 or bi == -1:
-                    l = 0
-                elif self.eq_matrix[ai, bi]:
-                    l = self.length_matrix[ai - 1, bi - 1] + 1
-                else:
-                    l = max(
-                        self.length_matrix[ai, bi - 1],
-                        self.length_matrix[ai - 1, bi])
-
-                self.length_matrix[ai, bi] = l
-
-    def backtrack(self, ai, bi):
-        if ai < 0 or bi < 0:
-            # Base case: backtracked beyond the beginning with no match
-            return []
-
-        merged = self.eq_matrix[ai, bi]
-        if merged is not None:
-            return self.backtrack(ai - 1, bi - 1) + [(ai, bi, merged)]
-
-        if self.length_matrix[ai, bi - 1] > self.length_matrix[ai - 1, bi]:
-            return self.backtrack(ai, bi - 1)
         else:
-            return self.backtrack(ai - 1, bi)
+            # Whoops, no parent selectors.  Just need to double-check that
+            # there are no uses of `&`.
+            for c_selector in c_selectors:
+                if c_selector.has_parent_reference:
+                    raise ValueError("Can't use parent selector '&' in top-level rules")
 
-    def find(self):
-        return self.backtrack(len(self.a) - 1, len(self.b) - 1)
+            # Add the children as a new header
+            new_headers = self.headers + (BlockSelectorHeader(c_selectors),)
+            return RuleAncestry(new_headers)
 
 
 class BlockHeader(object):
@@ -618,9 +362,9 @@ class BlockSelectorHeader(BlockHeader):
 
     def render(self, sep=', ', super_selector=''):
         return sep.join(sorted(
-            super_selector + s
+            super_selector + s.render()
             for s in self.selectors
-            if not _has_placeholder_re.search(s)))
+            if not s.has_placeholder))
 
 
 class BlockScopeHeader(BlockHeader):
diff --git a/scss/selector.py b/scss/selector.py
index 086d511..a235f88 100644
--- a/scss/selector.py
+++ b/scss/selector.py
@@ -1,80 +1,262 @@
+import re
+
+# Super dumb little selector parser.
+
+# Yes, yes, this is a regex tokenizer.  The actual meaning of the
+# selector doesn't matter; the parts are just important for matching up
+# during @extend.
+
+# Selectors have three levels: simple, combinator, comma-delimited.
+# Each combinator can only appear once as a delimiter between simple
+# selectors, so it can be thought of as a prefix.
+# So this:
+#     a.b + c, d#e
+# parses into two Selectors with these structures:
+#     [[' ', 'a', '.b'], ['+', 'c']]
+#     [[' ', 'd', '#e']]
+# Note that the first simple selector has an implied descendant
+# combinator -- i.e., it is a descendant of the root element.
+# TODO `*html` is incorrectly parsed as a single selector
+# TODO this oughta be touched up for css4 selectors
+SELECTOR_TOKENIZER = re.compile(
+r'''
+    # Colons introduce pseudo-selectors, sometimes with parens
+    # TODO doesn't handle quoted )
+    [:]+ [-\w]+ (?: [(] .+? [)] )?
+
+    # These guys are combinators -- note that a single space counts too
+    | \s* [ +>~] \s*
+
+    # Square brackets are attribute tests
+    # TODO: this doesn't handle ] within a string
+    | [[] .+? []]
+
+    # Dot and pound start class/id selectors.  Percent starts a Sass
+    # extend-target faux selector.
+    | [.#%] [-\w]+
+
+    # Percentages are used for @keyframes
+    | \d+ [%]
+
+    # Plain identifiers, or single asterisks, are element names
+    | [-\w]+
+    | [*]
+
+    # & is the sass replacement token
+    | [&]
+
+    # And as a last-ditch effort, just eat up to whitespace
+    | (\S+)
+''', re.VERBOSE | re.MULTILINE)
+
+
+# Maps the first character of a token to a rough ordering.  The default
+# (element names) is zero.
+TOKEN_TYPE_ORDER = {
+    '#': 2,
+    '.': 3,
+    ':': 4,
+    '[': 5,
+    '%': 6,
+}
+TOKEN_SORT_KEY = lambda token: TOKEN_TYPE_ORDER.get(token[0], 0)
+
+
+class SimpleSelector(object):
+    def __init__(self, combinator, tokens):
+        self.combinator = combinator
+        # TODO enforce that only one element name (including *) appears in a
+        # selector
+        # TODO remove duplicates
+        self.tokens = tuple(sorted(tokens, key=TOKEN_SORT_KEY))
+
+    def __repr__(self):
+        return "<%s: %r>" % (type(self).__name__, self.render())
+
+    def __hash__(self):
+        return hash((self.combinator, self.tokens))
+
+    def __eq__(self, other):
+        if not isinstance(other, SimpleSelector):
+            return NotImplemented
+
+        return self.combinator == other.combinator and self.tokens == other.tokens
+
+    @property
+    def has_parent_reference(self):
+        return '&' in self.tokens or 'self' in self.tokens
+
+    @property
+    def has_placeholder(self):
+        return any(
+            token[0] == '%'
+            for token in self.tokens)
+
+    def is_superset_of(self, other):
+        return (
+            self.combinator == other.combinator and
+            set(self.tokens) <= set(other.tokens))
+
+    def replace_parent(self, parent_simples):
+        assert parent_simples
+
+        ancestors = parent_simples[:-1]
+        parent = parent_simples[-1]
+
+        did_replace = False
+        new_tokens = []
+        for token in self.tokens:
+            if not did_replace and token in ('&', 'self'):
+                did_replace = True
+                new_tokens.extend(parent.tokens)
+            else:
+                new_tokens.append(token)
+
+        if did_replace:
+            # This simple selector was merged into the direct parent
+            merged_simple = type(self)(self.combinator, new_tokens)
+            return ancestors + (merged_simple,)
+        else:
+            # This simple selector is completely separate
+            return parent_simples + (self,)
+
+    # TODO just use set ops for these, once the constructor removes dupes
+    def merge_with(self, other):
+        new_tokens = self.tokens + tuple(token for token in other.tokens if token not in set(self.tokens))
+        return type(self)(self.combinator, new_tokens)
+
+    def difference(self, other):
+        new_tokens = tuple(token for token in self.tokens if token not in set(other.tokens))
+        return type(self)(self.combinator, new_tokens)
+
+    def render(self):
+        # TODO fail if there are no tokens, or if one is a placeholder?
+        rendered = ''.join(self.tokens)
+        if self.combinator != ' ':
+            rendered = ' '.join((self.combinator, rendered))
+
+        return rendered
+
+
 class Selector(object):
     """A single CSS selector."""
 
     def __init__(self, selector, tree):
         """Private; please use parse()."""
         self.original_selector = selector
-        self._tree = tree
+        # TODO rename this
+        # TODO enforce uniqueness
+        self._tree = tuple(tree)
 
     @classmethod
-    def parse(cls, selector):
-        # Super dumb little selector parser
-
-        # Yes, yes, this is a regex tokenizer.  The actual meaning of the
-        # selector doesn't matter; the parts are just important for matching up
-        # during @extend.
-        import re
-        tokenizer = re.compile(
-        r'''
-            # Colons introduce pseudo-selectors, sometimes with parens
-            # TODO doesn't handle quoted )
-            [:]+ [-\w]+ (?: [(] .+? [)] )?
-
-            # Square brackets are attribute tests
-            # TODO: this doesn't handle ] within a string
-            | [[] .+? []]
-
-            # Dot and pound start class/id selectors.  Percent starts a Sass
-            # extend-target faux selector.
-            | [.#%] [-\w]+
-
-            # Plain identifiers, or single asterisks, are element names
-            | [-\w]+
-            | [*]
-
-            # These guys are combinators -- note that a single space counts too
-            | \s* [ +>~] \s*
-
-            # And as a last-ditch effort for something really outlandish (e.g.
-            # percentages as faux-selectors in @keyframes), just eat up to the
-            # next whitespace
-            | (\S+)
-        ''', re.VERBOSE | re.MULTILINE)
-
-        # Selectors have three levels: simple, combinator, comma-delimited.
-        # Each combinator can only appear once as a delimiter between simple
-        # selectors, so it can be thought of as a prefix.
-        # So this:
-        #     a.b + c, d#e
-        # parses into two Selectors with these structures:
-        #     [[' ', 'a', '.b'], ['+', 'c']]
-        #     [[' ', 'd', '#e']]
-        # Note that the first simple selector has an implied descendant
-        # combinator -- i.e., it is a descendant of the root element.
-        trees = [[[' ']]]
+    def parse_many(cls, selector):
+        selector = selector.strip()
+        ret = []
+        pending_tree = []
+        pending_combinator = ' '
+        pending_tokens = []
+
         pos = 0
         while pos < len(selector):
             # TODO i don't think this deals with " + " correctly.  anywhere.
-            m = tokenizer.match(selector, pos)
+            m = SELECTOR_TOKENIZER.match(selector, pos)
             if not m:
                 # TODO prettify me
                 raise SyntaxError("Couldn't parse selector: %r" % (selector,))
 
             token = m.group(0)
+            pos += len(token)
+
+            # Kill any extraneous space, BUT make sure not to turn a lone space
+            # into an empty string
+            token = token.strip() or ' '
+
             if token == ',':
-                trees.append([[' ']])
+                # End current selector
+                # TODO what about "+ ,"?  what do i even do with that
+                if pending_tokens:
+                    pending_tree.append(
+                        SimpleSelector(pending_combinator, pending_tokens))
+                if pending_tree:
+                    ret.append(cls(selector, pending_tree))
+                pending_tree = []
+                pending_combinator = ' '
+                pending_tokens = []
             elif token in ' +>~':
-                trees[-1].append([token])
+                # End current simple selector
+                if pending_tokens:
+                    pending_tree.append(
+                        SimpleSelector(pending_combinator, pending_tokens))
+                pending_combinator = token
+                pending_tokens = []
             else:
-                trees[-1][-1].append(token)
+                # Add to pending simple selector
+                pending_tokens.append(token)
 
-            pos += len(token)
 
-        # TODO this passes the whole selector, not just the part
-        return [cls(selector, tree) for tree in trees]
+        # Deal with any remaining pending bits
+        # TODO reduce copy-paste yikes
+        if pending_tokens:
+            pending_tree.append(
+                SimpleSelector(pending_combinator, pending_tokens))
+        if pending_tree:
+            ret.append(cls(selector, pending_tree))
+
+        return ret
+
+    @classmethod
+    def parse(cls, selector_string):
+        # TODO remove me
+        return cls.parse_many(selector_string)
+
+    @classmethod
+    def parse_one(cls, selector_string):
+        selectors = cls.parse_many(selector_string)
+        if len(selectors) != 1:
+            # TODO better error
+            raise ValueError
+
+        return selectors[0]
 
     def __repr__(self):
-        return "<%s: %r>" % (type(self).__name__, self._tree)
+        return "<%s: %r>" % (type(self).__name__, self.render())
+
+    def __hash__(self):
+        return hash(self._tree)
+
+    def __eq__(self, other):
+        if not isinstance(other, Selector):
+            return NotImplemented
+
+        return self._tree == other._tree
+
+    @property
+    def has_parent_reference(self):
+        return any(
+            simple.has_parent_reference
+            for simple in self._tree)
+
+    @property
+    def has_placeholder(self):
+        return any(
+            simple.has_placeholder
+            for simple in self._tree)
+
+    def with_parent(self, parent):
+        saw_parent_ref = False
+
+        new_tree = []
+        for simple in self._tree:
+            if simple.has_parent_reference:
+                new_tree.extend(simple.replace_parent(parent._tree))
+                saw_parent_ref = True
+            else:
+                new_tree.append(simple)
+
+        if not saw_parent_ref:
+            new_tree = parent._tree + tuple(new_tree)
+
+        return type(self)("", new_tree)
 
     def lookup_key(self):
         """Build a key from the "important" parts of a selector: elements,
@@ -83,7 +265,7 @@ class Selector(object):
         # TODO how does this work with multiple selectors
         parts = set()
         for node in self._tree:
-            for token in node[1:]:
+            for token in node.tokens:
                 if token[0] not in ':[':
                     parts.add(token)
 
@@ -107,7 +289,7 @@ class Selector(object):
                 node = self._tree[idx]
                 idx += 1
 
-                if node[0] == other_node[0] and set(node[1:]) <= set(other_node[1:]):
+                if node.is_superset_of(other_node):
                     break
 
         return True
@@ -134,17 +316,13 @@ class Selector(object):
         r_trail = replacement._tree[:-1]
         r_extras = replacement._tree[-1]
 
-        # TODO is this the right order?
         # TODO what if the prefix doesn't match?  who wins?  should we even get
         # this far?
-        focal_node = [p_extras[0]]
-        focal_node.extend(sorted(
-            p_extras[1:] + r_extras[1:],
-            key=lambda token: {'#':1,'.':2,':':3}.get(token[0], 0)))
+        focal_nodes = (p_extras.merge_with(r_extras),)
 
         befores = self._merge_trails(p_before, r_trail)
 
-        return [Selector(None, before + focal_node + p_after) for before in befores]
+        return [Selector(None, before + focal_nodes + p_after) for before in befores]
 
     def break_around(self, hinge):
         """Given a simple selector node contained within this one (a "hinge"),
@@ -158,7 +336,7 @@ class Selector(object):
         for i, node in enumerate(self._tree):
             # TODO does first combinator have to match?  maybe only if the
             # hinge has a non-descendant combinator?
-            if set(hinge_start[1:]) <= set(node[1:]):
+            if hinge_start.is_superset_of(node):
                 start_idx = i
                 break
         else:
@@ -166,7 +344,7 @@ class Selector(object):
 
         for i, hinge_node in enumerate(hinge):
             self_node = self._tree[start_idx + i]
-            if hinge_node[0] == self_node[0] and set(hinge_node[1:]) <= set(self_node[1:]):
+            if hinge_node.is_superset_of(self_node):
                 continue
 
             # TODO this isn't true; consider finding `a b` in `a c a b`
@@ -174,7 +352,7 @@ class Selector(object):
 
         end_idx = start_idx + len(hinge) - 1
         focal_node = self._tree[end_idx]
-        extras = [focal_node[0]] + [token for token in focal_node[1:] if token not in hinge[-1]]
+        extras = focal_node.difference(hinge[-1])
         return self._tree[:start_idx], extras, self._tree[end_idx + 1:]
 
     @staticmethod
@@ -189,7 +367,7 @@ class Selector(object):
         sequencer = LeastCommonSubsequencer(left, right, eq=_merge_selector_nodes)
         lcs = sequencer.find()
 
-        ret = [[]]
+        ret = [()]
         left_last = 0
         right_last = 0
         for left_next, right_next, merged in lcs:
@@ -197,11 +375,11 @@ class Selector(object):
             right_prefix = right[right_last:right_next]
 
             new_ret = [
-                node + left_prefix + right_prefix + [merged]
+                node + left_prefix + right_prefix + (merged,)
                 for node in ret]
             if left_prefix and right_prefix:
                 new_ret.extend(
-                    node + right_prefix + left_prefix + [merged]
+                    node + right_prefix + left_prefix + (merged,)
                     for node in ret)
             ret = new_ret
 
@@ -223,23 +401,20 @@ class Selector(object):
         return ret
 
     def render(self):
-        return ''.join(''.join(node) for node in self._tree).lstrip()
+        return ' '.join(simple.render() for simple in self._tree).lstrip()
 
 
 def _merge_selector_nodes(a, b):
     # TODO document, turn me into a method on something
     # TODO what about combinators
-    aset = frozenset(a[1:])
-    bset = frozenset(b[1:])
-    if aset <= bset:
-        return a + [token for token in b[1:] if token not in aset]
-    elif bset <= aset:
-        return b + [token for token in a[1:] if token not in bset]
+    if a.is_superset_of(b):
+        return a.merge_with(b)
+    elif b.is_superset_of(a):
+        return b.merge_with(a)
     else:
         return None
 
 
-
 class LeastCommonSubsequencer(object):
     # http://en.wikipedia.org/wiki/Longest_common_subsequence_problem#Code_for_the_dynamic_programming_solution
     def __init__(self, a, b, eq=lambda a, b: a if a == b else None):
diff --git a/scss/tests/files/original-doctests/026-forum-2.css b/scss/tests/files/original-doctests/026-forum-2.css
index 4991b75..3246676 100644
--- a/scss/tests/files/original-doctests/026-forum-2.css
+++ b/scss/tests/files/original-doctests/026-forum-2.css
@@ -4,9 +4,9 @@
 *html .b {
   color: #000;
 }
-*:first-child+html .a {
+*:first-child + html .a {
   color: #fff;
 }
-*:first-child+html .b {
+*:first-child + html .b {
   color: #000;
 }
author	Eevee (Alex Munroe) <eevee.git@veekun.com>	2013-09-10 18:32:42 -0700
committer	Eevee (Alex Munroe) <eevee.git@veekun.com>	2013-09-10 18:32:42 -0700
commit	956e2513e7fbba0a1acbf3ce7bb3cf53922ce557 (patch)
tree	62089def7e44df6637a646a06e250faa0d61f28d
parent	adc605c7a883ef755c1e22270f5adfd9d09e936c (diff)
download	pyscss-956e2513e7fbba0a1acbf3ce7bb3cf53922ce557.tar.gz