summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEevee (Alex Munroe) <eevee.git@veekun.com>2014-03-29 16:47:01 -0700
committerEevee (Alex Munroe) <eevee.git@veekun.com>2014-03-29 16:47:01 -0700
commit48507954f5806f073ad4d913edc919dc92d933e9 (patch)
treeb08b5af77fddc35e4e06ec178283e916bce4dd14
parente97461c577435fcb1e19da39e19af48437ea591f (diff)
downloadpyscss-48507954f5806f073ad4d913edc919dc92d933e9.tar.gz
Make the internals all use unicode, even on py2.
This was happening on py3 anyway, so might as well try to be Unicode-clean everywhere. The CSS charset detection is not actually implemented yet. Also, speedups are now totally broken on py2. Please hold.
-rw-r--r--scss/__init__.py27
-rw-r--r--scss/cssdefs.py21
-rw-r--r--scss/expression.py10
-rw-r--r--scss/tests/test_misc.py5
-rw-r--r--scss/tests/test_types.py5
-rw-r--r--scss/types.py13
-rw-r--r--scss/util.py6
7 files changed, 74 insertions, 13 deletions
diff --git a/scss/__init__.py b/scss/__init__.py
index 541dbb5..4fe72c3 100644
--- a/scss/__init__.py
+++ b/scss/__init__.py
@@ -36,6 +36,7 @@ xCSS:
"""
from __future__ import absolute_import
from __future__ import print_function
+from __future__ import unicode_literals
from scss.scss_meta import BUILD_INFO, PROJECT, VERSION, REVISION, URL, AUTHOR, AUTHOR_EMAIL, LICENSE
@@ -47,6 +48,7 @@ __license__ = LICENSE
from collections import defaultdict
import glob
+from io import BytesIO
from itertools import product
import logging
import warnings
@@ -59,6 +61,7 @@ import six
from scss import config
from scss.cssdefs import (
SEPARATOR,
+ determine_encoding,
_ml_comment_re, _sl_comment_re,
_escape_chars_re,
_spaces_re, _expand_rules_space_re, _collapse_properties_space_re,
@@ -131,8 +134,9 @@ _default_scss_vars = {
class SourceFile(object):
- def __init__(self, filename, contents, parent_dir='.', is_string=False, is_sass=None, line_numbers=True, line_strip=True):
+ def __init__(self, filename, contents, encoding=None, parent_dir='.', is_string=False, is_sass=None, line_numbers=True, line_strip=True):
self.filename = filename
+ self.encoding = encoding
self.sass = filename.endswith('.sass') if is_sass is None else is_sass
self.line_numbers = line_numbers
self.line_strip = line_strip
@@ -147,17 +151,32 @@ class SourceFile(object):
)
@classmethod
- def from_filename(cls, fn, filename=None, is_sass=None, line_numbers=True):
+ def from_filename(cls, fn, filename=None, **kwargs):
if filename is None:
_, filename = os.path.split(fn)
with open(fn) as f:
- contents = f.read()
+ return cls.from_file(f, filename=filename, **kwargs)
- return cls(filename, contents, is_sass=is_sass, line_numbers=line_numbers)
+ @classmethod
+ def from_file(cls, f, filename, **kwargs):
+ encoding = determine_encoding(f)
+
+ return cls(filename, contents, encoding=encoding, **kwargs)
@classmethod
def from_string(cls, string, filename=None, is_sass=None, line_numbers=True):
+ if isinstance(string, six.text_type):
+ # Already decoded; we don't know what encoding to use for output,
+ # though, so assume UTF-8
+ # TODO in this case we could still look for a @charset, right?
+ pass
+ elif isinstance(string, six.binary_type):
+ encoding = determine_encoding(BytesIO(string))
+ string = string.decode(encoding)
+ else:
+ raise TypeError("Expected a string, got {0!r}".format(string))
+
if filename is None:
filename = "<string %r...>" % string[:50]
diff --git a/scss/cssdefs.py b/scss/cssdefs.py
index 7b73805..bfd6b19 100644
--- a/scss/cssdefs.py
+++ b/scss/cssdefs.py
@@ -1,3 +1,6 @@
+"""Constants and functions defined by the CSS specification, not specific to
+Sass.
+"""
from math import pi
import re
@@ -339,6 +342,24 @@ def is_builtin_css_function(name):
return False
# ------------------------------------------------------------------------------
+# CSS character set determination
+# Based upon: http://www.w3.org/TR/CSS2/syndata.html#charset
+
+def determine_encoding(f):
+ """Return the appropriate encoding for the given file, according to the CSS
+ charset rules.
+
+ `f` should be a file-like object, opened in binary mode with the cursor at
+ the beginning.
+ """
+ # TODO haha.
+
+ # This is the ultimate default: just assume UTF-8
+ return "UTF-8"
+
+
+
+# ------------------------------------------------------------------------------
# Bits and pieces of grammar, as regexen
SEPARATOR = '\x00'
diff --git a/scss/expression.py b/scss/expression.py
index 0c889b1..33f0218 100644
--- a/scss/expression.py
+++ b/scss/expression.py
@@ -1,10 +1,12 @@
from __future__ import absolute_import
from __future__ import print_function
+from __future__ import unicode_literals
from functools import partial
import logging
import operator
import re
+from warnings import warn
import six
@@ -53,7 +55,13 @@ class Calculator(object):
syntactic unit and will not be re-evaluated.
"""
# TODO that's a lie! this should be in the parser for most cases.
- cont = str(cont)
+ if not isinstance(cont, six.string_types):
+ warn(FutureWarning(
+ "do_glob_math was passed a non-string {0!r} "
+ "-- this will no longer be supported in pyScss 2.0"
+ .format(cont)
+ ))
+ cont = six.text_type(cont)
if '#{' not in cont:
return cont
cont = _expr_glob_re.sub(self._pound_substitute, cont)
diff --git a/scss/tests/test_misc.py b/scss/tests/test_misc.py
index e8cdd28..beb231a 100644
--- a/scss/tests/test_misc.py
+++ b/scss/tests/test_misc.py
@@ -98,10 +98,11 @@ def test_extend_across_files():
def test_unicode_files():
- compiler = Scss()
+ compiler = Scss(scss_opts=dict(style='expanded'))
unicode_input = u"""q {
quotes: "“" "”" "‘" "’";
-}"""
+}
+"""
output = compiler.compile(unicode_input)
assert output == unicode_input
diff --git a/scss/tests/test_types.py b/scss/tests/test_types.py
index 026be02..f22bd0b 100644
--- a/scss/tests/test_types.py
+++ b/scss/tests/test_types.py
@@ -1,4 +1,7 @@
"""Tests for the type system."""
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
from scss.types import Color, Null, Number, String
@@ -36,7 +39,7 @@ def test_addition():
ret = String('abc', quotes='"') + String('def', quotes=None)
assert ret == String('abcdef')
- assert ret.quotes is '"'
+ assert ret.quotes == '"'
ret = String('abc', quotes=None) + String('def', quotes='"')
assert ret == String('abcdef')
diff --git a/scss/types.py b/scss/types.py
index aafbab4..5e8f02a 100644
--- a/scss/types.py
+++ b/scss/types.py
@@ -1,9 +1,11 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+from __future__ import unicode_literals
import colorsys
import operator
+from warnings import warn
import six
@@ -989,6 +991,11 @@ class String(Value):
value = str(value)
if isinstance(value, six.binary_type):
+ warn(FutureWarning(
+ "String got a bytes type {0!r} "
+ "-- this will no longer be supported in pyScss 2.0"
+ .format(value)
+ ))
value = value.decode(DEFAULT_STRING_ENCODING)
if not isinstance(value, six.text_type):
@@ -997,11 +1004,7 @@ class String(Value):
# TODO probably disallow creating an unquoted string outside a
# set of chars like [-a-zA-Z0-9]+
- if six.PY3:
- self.value = value
- else:
- # TODO well, at least 3 uses unicode everywhere
- self.value = value.encode(DEFAULT_STRING_ENCODING)
+ self.value = value
self.quotes = quotes
@classmethod
diff --git a/scss/util.py b/scss/util.py
index 4ed4898..0cb5d20 100644
--- a/scss/util.py
+++ b/scss/util.py
@@ -1,5 +1,6 @@
from __future__ import absolute_import
from __future__ import print_function
+from __future__ import unicode_literals
import base64
import hashlib
@@ -106,6 +107,11 @@ def make_filename_hash(key):
suitable for a filename.
"""
key_repr = repr(key).encode('utf8')
+ # This is really stupid but necessary for making the repr()s be the same on
+ # Python 2 and 3 and thus allowing the test suite to run on both.
+ # TODO better solutions include: not using a repr, not embedding hashes in
+ # the expected test results
+ key_repr = re.sub(b"\\bu'", b"'", key_repr)
key_hash = hashlib.md5(key_repr).digest()
return base64.b64encode(key_hash, b'__').decode('ascii').rstrip('=')