diff options
author | Eevee (Alex Munroe) <eevee.git@veekun.com> | 2014-03-29 16:47:01 -0700 |
---|---|---|
committer | Eevee (Alex Munroe) <eevee.git@veekun.com> | 2014-03-29 16:47:01 -0700 |
commit | 48507954f5806f073ad4d913edc919dc92d933e9 (patch) | |
tree | b08b5af77fddc35e4e06ec178283e916bce4dd14 | |
parent | e97461c577435fcb1e19da39e19af48437ea591f (diff) | |
download | pyscss-48507954f5806f073ad4d913edc919dc92d933e9.tar.gz |
Make the internals all use unicode, even on py2.
This was happening on py3 anyway, so might as well try to be
Unicode-clean everywhere.
The CSS charset detection is not actually implemented yet.
Also, speedups are now totally broken on py2. Please hold.
-rw-r--r-- | scss/__init__.py | 27 | ||||
-rw-r--r-- | scss/cssdefs.py | 21 | ||||
-rw-r--r-- | scss/expression.py | 10 | ||||
-rw-r--r-- | scss/tests/test_misc.py | 5 | ||||
-rw-r--r-- | scss/tests/test_types.py | 5 | ||||
-rw-r--r-- | scss/types.py | 13 | ||||
-rw-r--r-- | scss/util.py | 6 |
7 files changed, 74 insertions, 13 deletions
diff --git a/scss/__init__.py b/scss/__init__.py index 541dbb5..4fe72c3 100644 --- a/scss/__init__.py +++ b/scss/__init__.py @@ -36,6 +36,7 @@ xCSS: """ from __future__ import absolute_import from __future__ import print_function +from __future__ import unicode_literals from scss.scss_meta import BUILD_INFO, PROJECT, VERSION, REVISION, URL, AUTHOR, AUTHOR_EMAIL, LICENSE @@ -47,6 +48,7 @@ __license__ = LICENSE from collections import defaultdict import glob +from io import BytesIO from itertools import product import logging import warnings @@ -59,6 +61,7 @@ import six from scss import config from scss.cssdefs import ( SEPARATOR, + determine_encoding, _ml_comment_re, _sl_comment_re, _escape_chars_re, _spaces_re, _expand_rules_space_re, _collapse_properties_space_re, @@ -131,8 +134,9 @@ _default_scss_vars = { class SourceFile(object): - def __init__(self, filename, contents, parent_dir='.', is_string=False, is_sass=None, line_numbers=True, line_strip=True): + def __init__(self, filename, contents, encoding=None, parent_dir='.', is_string=False, is_sass=None, line_numbers=True, line_strip=True): self.filename = filename + self.encoding = encoding self.sass = filename.endswith('.sass') if is_sass is None else is_sass self.line_numbers = line_numbers self.line_strip = line_strip @@ -147,17 +151,32 @@ class SourceFile(object): ) @classmethod - def from_filename(cls, fn, filename=None, is_sass=None, line_numbers=True): + def from_filename(cls, fn, filename=None, **kwargs): if filename is None: _, filename = os.path.split(fn) with open(fn) as f: - contents = f.read() + return cls.from_file(f, filename=filename, **kwargs) - return cls(filename, contents, is_sass=is_sass, line_numbers=line_numbers) + @classmethod + def from_file(cls, f, filename, **kwargs): + encoding = determine_encoding(f) + + return cls(filename, contents, encoding=encoding, **kwargs) @classmethod def from_string(cls, string, filename=None, is_sass=None, line_numbers=True): + if isinstance(string, six.text_type): + # Already decoded; we don't know what encoding to use for output, + # though, so assume UTF-8 + # TODO in this case we could still look for a @charset, right? + pass + elif isinstance(string, six.binary_type): + encoding = determine_encoding(BytesIO(string)) + string = string.decode(encoding) + else: + raise TypeError("Expected a string, got {0!r}".format(string)) + if filename is None: filename = "<string %r...>" % string[:50] diff --git a/scss/cssdefs.py b/scss/cssdefs.py index 7b73805..bfd6b19 100644 --- a/scss/cssdefs.py +++ b/scss/cssdefs.py @@ -1,3 +1,6 @@ +"""Constants and functions defined by the CSS specification, not specific to +Sass. +""" from math import pi import re @@ -339,6 +342,24 @@ def is_builtin_css_function(name): return False # ------------------------------------------------------------------------------ +# CSS character set determination +# Based upon: http://www.w3.org/TR/CSS2/syndata.html#charset + +def determine_encoding(f): + """Return the appropriate encoding for the given file, according to the CSS + charset rules. + + `f` should be a file-like object, opened in binary mode with the cursor at + the beginning. + """ + # TODO haha. + + # This is the ultimate default: just assume UTF-8 + return "UTF-8" + + + +# ------------------------------------------------------------------------------ # Bits and pieces of grammar, as regexen SEPARATOR = '\x00' diff --git a/scss/expression.py b/scss/expression.py index 0c889b1..33f0218 100644 --- a/scss/expression.py +++ b/scss/expression.py @@ -1,10 +1,12 @@ from __future__ import absolute_import from __future__ import print_function +from __future__ import unicode_literals from functools import partial import logging import operator import re +from warnings import warn import six @@ -53,7 +55,13 @@ class Calculator(object): syntactic unit and will not be re-evaluated. """ # TODO that's a lie! this should be in the parser for most cases. - cont = str(cont) + if not isinstance(cont, six.string_types): + warn(FutureWarning( + "do_glob_math was passed a non-string {0!r} " + "-- this will no longer be supported in pyScss 2.0" + .format(cont) + )) + cont = six.text_type(cont) if '#{' not in cont: return cont cont = _expr_glob_re.sub(self._pound_substitute, cont) diff --git a/scss/tests/test_misc.py b/scss/tests/test_misc.py index e8cdd28..beb231a 100644 --- a/scss/tests/test_misc.py +++ b/scss/tests/test_misc.py @@ -98,10 +98,11 @@ def test_extend_across_files(): def test_unicode_files(): - compiler = Scss() + compiler = Scss(scss_opts=dict(style='expanded')) unicode_input = u"""q { quotes: "“" "”" "‘" "’"; -}""" +} +""" output = compiler.compile(unicode_input) assert output == unicode_input diff --git a/scss/tests/test_types.py b/scss/tests/test_types.py index 026be02..f22bd0b 100644 --- a/scss/tests/test_types.py +++ b/scss/tests/test_types.py @@ -1,4 +1,7 @@ """Tests for the type system.""" +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals from scss.types import Color, Null, Number, String @@ -36,7 +39,7 @@ def test_addition(): ret = String('abc', quotes='"') + String('def', quotes=None) assert ret == String('abcdef') - assert ret.quotes is '"' + assert ret.quotes == '"' ret = String('abc', quotes=None) + String('def', quotes='"') assert ret == String('abcdef') diff --git a/scss/types.py b/scss/types.py index aafbab4..5e8f02a 100644 --- a/scss/types.py +++ b/scss/types.py @@ -1,9 +1,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from __future__ import unicode_literals import colorsys import operator +from warnings import warn import six @@ -989,6 +991,11 @@ class String(Value): value = str(value) if isinstance(value, six.binary_type): + warn(FutureWarning( + "String got a bytes type {0!r} " + "-- this will no longer be supported in pyScss 2.0" + .format(value) + )) value = value.decode(DEFAULT_STRING_ENCODING) if not isinstance(value, six.text_type): @@ -997,11 +1004,7 @@ class String(Value): # TODO probably disallow creating an unquoted string outside a # set of chars like [-a-zA-Z0-9]+ - if six.PY3: - self.value = value - else: - # TODO well, at least 3 uses unicode everywhere - self.value = value.encode(DEFAULT_STRING_ENCODING) + self.value = value self.quotes = quotes @classmethod diff --git a/scss/util.py b/scss/util.py index 4ed4898..0cb5d20 100644 --- a/scss/util.py +++ b/scss/util.py @@ -1,5 +1,6 @@ from __future__ import absolute_import from __future__ import print_function +from __future__ import unicode_literals import base64 import hashlib @@ -106,6 +107,11 @@ def make_filename_hash(key): suitable for a filename. """ key_repr = repr(key).encode('utf8') + # This is really stupid but necessary for making the repr()s be the same on + # Python 2 and 3 and thus allowing the test suite to run on both. + # TODO better solutions include: not using a repr, not embedding hashes in + # the expected test results + key_repr = re.sub(b"\\bu'", b"'", key_repr) key_hash = hashlib.md5(key_repr).digest() return base64.b64encode(key_hash, b'__').decode('ascii').rstrip('=') |