summaryrefslogtreecommitdiff
path: root/scss/source.py
blob: 74a6937e453f1daaf21fd95501db53590df48740 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division

import hashlib
import logging
import os
import re

import six

from scss.cssdefs import (
    _ml_comment_re, _sl_comment_re,
    _expand_rules_space_re, _collapse_properties_space_re,
    _strings_re,
)
from scss.cssdefs import determine_encoding


log = logging.getLogger(__name__)


_safe_strings = {
    '^doubleslash^': '//',
    '^bigcopen^': '/*',
    '^bigcclose^': '*/',
    '^doubledot^': ':',
    '^semicolon^': ';',
    '^curlybracketopen^': '{',
    '^curlybracketclosed^': '}',
}
_reverse_safe_strings = dict((v, k) for k, v in _safe_strings.items())
_safe_strings_re = re.compile('|'.join(map(re.escape, _safe_strings)))
_reverse_safe_strings_re = re.compile('|'.join(
    map(re.escape, _reverse_safe_strings)))


class SourceFile(object):
    """A single input file to be fed to the compiler.  Detects the encoding
    (according to CSS spec rules) and performs some light pre-processing.
    """

    path = None
    """For "real" files, an absolute path to the original source file.  For ad
    hoc strings, some other kind of identifier.  This is used as a hash key and
    a test of equality, so it MUST be unique!
    """

    def __init__(
            self, path, contents, encoding=None,
            is_real_file=True, is_sass=None):
        """Not normally used.  See the three alternative constructors:
        :func:`SourceFile.from_file`, :func:`SourceFile.from_filename`, and
        :func:`SourceFile.from_string`.
        """
        if not isinstance(contents, six.text_type):
            raise TypeError(
                "Expected bytes for 'contents', got {0}"
                .format(type(contents)))

        if is_real_file and not os.path.isabs(path):
            raise ValueError(
                "Expected an absolute path for 'path', got {0!r}"
                .format(path))

        self.path = path
        self.encoding = encoding
        if is_sass is None:
            # TODO autodetect from the contents if the extension is bogus or
            # missing?
            self.is_sass = os.path.splitext(path)[1] == '.sass'
        else:
            self.is_sass = is_sass
        self.contents = self.prepare_source(contents)
        self.is_real_file = is_real_file

    def __repr__(self):
        return "<{0} {1!r}>".format(type(self).__name__, self.path)

    def __hash__(self):
        return hash(self.path)

    def __eq__(self, other):
        if self is other:
            return True

        if not isinstance(other, SourceFile):
            return NotImplemented

        return self.path == other.path

    def __ne__(self, other):
        return not self == other

    @classmethod
    def from_filename(cls, fn, path=None, **kwargs):
        """Read Sass source from a file on disk."""
        # Open in binary mode so we can reliably detect the encoding
        with open(fn, 'rb') as f:
            return cls.from_file(f, path=path or fn, **kwargs)

    @classmethod
    def from_file(cls, f, path=None, **kwargs):
        """Read Sass source from a file or file-like object."""
        contents = f.read()
        encoding = determine_encoding(contents)
        if isinstance(contents, six.binary_type):
            contents = contents.decode(encoding)

        is_real_file = False
        if path is None:
            path = getattr(f, 'name', repr(f))
        elif os.path.exists(path):
            path = os.path.normpath(os.path.abspath(path))
            is_real_file = True

        return cls(
            path, contents, encoding=encoding, is_real_file=is_real_file,
            **kwargs)

    @classmethod
    def from_string(cls, string, path=None, encoding=None, is_sass=None):
        """Read Sass source from the contents of a string."""
        if isinstance(string, six.text_type):
            # Already decoded; we don't know what encoding to use for output,
            # though, so still check for a @charset.
            # TODO what if the given encoding conflicts with the one in the
            # file?  do we care?
            if encoding is None:
                encoding = determine_encoding(string)

            byte_contents = string.encode(encoding)
            text_contents = string
        elif isinstance(string, six.binary_type):
            encoding = determine_encoding(string)
            byte_contents = string
            text_contents = string.decode(encoding)
        else:
            raise TypeError("Expected text or bytes, got {0!r}".format(string))

        is_real_file = False
        if path is None:
            m = hashlib.sha256()
            m.update(byte_contents)
            path = 'string:' + m.hexdigest()
        elif os.path.exists(path):
            path = os.path.normpath(os.path.abspath(path))
            is_real_file = True

        return cls(
            path, text_contents, encoding=encoding, is_real_file=is_real_file,
            is_sass=is_sass,
        )

    def parse_scss_line(self, line_no, line, state):
        ret = ''

        if line is None:
            line = ''

        line = state['line_buffer'] + line.rstrip()  # remove EOL character

        if line and line[-1] == '\\':
            state['line_buffer'] = line[:-1]
            return ''
        else:
            state['line_buffer'] = ''

        output = state['prev_line']
        output = output.strip()

        state['prev_line'] = line
        state['prev_line_no'] = line_no

        if output:
            output += '\n'
            ret += output

        return ret

    def parse_sass_line(self, line_no, line, state):
        ret = ''

        if line is None:
            line = ''

        line = state['line_buffer'] + line.rstrip()  # remove EOL character

        if line and line[-1] == '\\':
            state['line_buffer'] = line[:-1]
            return ret
        else:
            state['line_buffer'] = ''

        indent = len(line) - len(line.lstrip())

        # make sure we support multi-space indent as long as indent is
        # consistent
        if indent and not state['indent_marker']:
            state['indent_marker'] = indent

        if state['indent_marker']:
            indent //= state['indent_marker']

        if indent == state['prev_indent']:
            # same indentation as previous line
            if state['prev_line']:
                state['prev_line'] += ';'
        elif indent > state['prev_indent']:
            # new indentation is greater than previous, we just entered a new
            # block
            state['prev_line'] += ' {'
            state['nested_blocks'] += 1
        else:
            # indentation is reset, we exited a block
            block_diff = state['prev_indent'] - indent
            if state['prev_line']:
                state['prev_line'] += ';'
            state['prev_line'] += ' }' * block_diff
            state['nested_blocks'] -= block_diff

        output = state['prev_line']
        output = output.strip()

        state['prev_indent'] = indent
        state['prev_line'] = line
        state['prev_line_no'] = line_no

        if output:
            output += '\n'
            ret += output
        return ret

    def prepare_source(self, codestr, sass=False):
        # Decorate lines with their line numbers and a delimiting NUL and
        # remove empty lines
        state = {
            'line_buffer': '',
            'prev_line': '',
            'prev_line_no': 0,
            'prev_indent': 0,
            'nested_blocks': 0,
            'indent_marker': 0,
        }
        if self.is_sass:
            parse_line = self.parse_sass_line
        else:
            parse_line = self.parse_scss_line
        _codestr = codestr
        codestr = ''
        for line_no, line in enumerate(_codestr.splitlines()):
            codestr += parse_line(line_no, line, state)
        # parse the last line stored in prev_line buffer
        codestr += parse_line(None, None, state)

        # protects codestr: "..." strings
        codestr = _strings_re.sub(
            lambda m: _reverse_safe_strings_re.sub(
                lambda n: _reverse_safe_strings[n.group(0)], m.group(0)),
            codestr)

        # removes multiple line comments
        codestr = _ml_comment_re.sub('', codestr)

        # removes inline comments, but not :// (protocol)
        codestr = _sl_comment_re.sub('', codestr)

        codestr = _safe_strings_re.sub(
            lambda m: _safe_strings[m.group(0)], codestr)

        # expand the space in rules
        codestr = _expand_rules_space_re.sub(' {', codestr)

        # collapse the space in properties blocks
        codestr = _collapse_properties_space_re.sub(r'\1{', codestr)

        return codestr