Make the internals all use unicode, even on py2.

This was happening on py3 anyway, so might as well try to be Unicode-clean everywhere. The CSS charset detection is not actually implemented yet. Also, speedups are now totally broken on py2. Please hold.
author: Eevee (Alex Munroe) <eevee.git@veekun.com> 2014-03-29 16:47:01 -0700
committer: Eevee (Alex Munroe) <eevee.git@veekun.com> 2014-03-29 16:47:01 -0700
commit: 48507954f5806f073ad4d913edc919dc92d933e9 (patch)
tree: b08b5af77fddc35e4e06ec178283e916bce4dd14
parent: e97461c577435fcb1e19da39e19af48437ea591f (diff)
download: pyscss-48507954f5806f073ad4d913edc919dc92d933e9.tar.gz
7 files changed, 74 insertions, 13 deletions
diff --git a/scss/__init__.py b/scss/__init__.py
index 541dbb5..4fe72c3 100644
--- a/scss/__init__.py
+++ b/scss/__init__.py
@@ -36,6 +36,7 @@ xCSS:
 """
 from __future__ import absolute_import
 from __future__ import print_function
+from __future__ import unicode_literals
 
 from scss.scss_meta import BUILD_INFO, PROJECT, VERSION, REVISION, URL, AUTHOR, AUTHOR_EMAIL, LICENSE
 
@@ -47,6 +48,7 @@ __license__ = LICENSE
 
 from collections import defaultdict
 import glob
+from io import BytesIO
 from itertools import product
 import logging
 import warnings
@@ -59,6 +61,7 @@ import six
 from scss import config
 from scss.cssdefs import (
     SEPARATOR,
+    determine_encoding,
     _ml_comment_re, _sl_comment_re,
     _escape_chars_re,
     _spaces_re, _expand_rules_space_re, _collapse_properties_space_re,
@@ -131,8 +134,9 @@ _default_scss_vars = {
 
 
 class SourceFile(object):
-    def __init__(self, filename, contents, parent_dir='.', is_string=False, is_sass=None, line_numbers=True, line_strip=True):
+    def __init__(self, filename, contents, encoding=None, parent_dir='.', is_string=False, is_sass=None, line_numbers=True, line_strip=True):
         self.filename = filename
+        self.encoding = encoding
         self.sass = filename.endswith('.sass') if is_sass is None else is_sass
         self.line_numbers = line_numbers
         self.line_strip = line_strip
@@ -147,17 +151,32 @@ class SourceFile(object):
         )
 
     @classmethod
-    def from_filename(cls, fn, filename=None, is_sass=None, line_numbers=True):
+    def from_filename(cls, fn, filename=None, **kwargs):
         if filename is None:
             _, filename = os.path.split(fn)
 
         with open(fn) as f:
-            contents = f.read()
+            return cls.from_file(f, filename=filename, **kwargs)
 
-        return cls(filename, contents, is_sass=is_sass, line_numbers=line_numbers)
+    @classmethod
+    def from_file(cls, f, filename, **kwargs):
+        encoding = determine_encoding(f)
+
+        return cls(filename, contents, encoding=encoding, **kwargs)
 
     @classmethod
     def from_string(cls, string, filename=None, is_sass=None, line_numbers=True):
+        if isinstance(string, six.text_type):
+            # Already decoded; we don't know what encoding to use for output,
+            # though, so assume UTF-8
+            # TODO in this case we could still look for a @charset, right?
+            pass
+        elif isinstance(string, six.binary_type):
+            encoding = determine_encoding(BytesIO(string))
+            string = string.decode(encoding)
+        else:
+            raise TypeError("Expected a string, got {0!r}".format(string))
+
         if filename is None:
             filename = "<string %r...>" % string[:50]
 
diff --git a/scss/cssdefs.py b/scss/cssdefs.py
index 7b73805..bfd6b19 100644
--- a/scss/cssdefs.py
+++ b/scss/cssdefs.py
@@ -1,3 +1,6 @@
+"""Constants and functions defined by the CSS specification, not specific to
+Sass.
+"""
 from math import pi
 import re
 
@@ -339,6 +342,24 @@ def is_builtin_css_function(name):
     return False
 
 # ------------------------------------------------------------------------------
+# CSS character set determination
+# Based upon: http://www.w3.org/TR/CSS2/syndata.html#charset
+
+def determine_encoding(f):
+    """Return the appropriate encoding for the given file, according to the CSS
+    charset rules.
+
+    `f` should be a file-like object, opened in binary mode with the cursor at
+    the beginning.
+    """
+    # TODO haha.
+
+    # This is the ultimate default: just assume UTF-8
+    return "UTF-8"
+
+
+
+# ------------------------------------------------------------------------------
 # Bits and pieces of grammar, as regexen
 
 SEPARATOR = '\x00'
diff --git a/scss/expression.py b/scss/expression.py
index 0c889b1..33f0218 100644
--- a/scss/expression.py
+++ b/scss/expression.py
@@ -1,10 +1,12 @@
 from __future__ import absolute_import
 from __future__ import print_function
+from __future__ import unicode_literals
 
 from functools import partial
 import logging
 import operator
 import re
+from warnings import warn
 
 import six
 
@@ -53,7 +55,13 @@ class Calculator(object):
         syntactic unit and will not be re-evaluated.
         """
         # TODO that's a lie!  this should be in the parser for most cases.
-        cont = str(cont)
+        if not isinstance(cont, six.string_types):
+            warn(FutureWarning(
+                "do_glob_math was passed a non-string {0!r} "
+                "-- this will no longer be supported in pyScss 2.0"
+                .format(cont)
+            ))
+            cont = six.text_type(cont)
         if '#{' not in cont:
             return cont
         cont = _expr_glob_re.sub(self._pound_substitute, cont)
diff --git a/scss/tests/test_misc.py b/scss/tests/test_misc.py
index e8cdd28..beb231a 100644
--- a/scss/tests/test_misc.py
+++ b/scss/tests/test_misc.py
@@ -98,10 +98,11 @@ def test_extend_across_files():
 
 
 def test_unicode_files():
-    compiler = Scss()
+    compiler = Scss(scss_opts=dict(style='expanded'))
     unicode_input = u"""q {
   quotes: "“" "”" "‘" "’";
-}"""
+}
+"""
     output = compiler.compile(unicode_input)
 
     assert output == unicode_input
diff --git a/scss/tests/test_types.py b/scss/tests/test_types.py
index 026be02..f22bd0b 100644
--- a/scss/tests/test_types.py
+++ b/scss/tests/test_types.py
@@ -1,4 +1,7 @@
 """Tests for the type system."""
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
 
 from scss.types import Color, Null, Number, String
 
@@ -36,7 +39,7 @@ def test_addition():
 
     ret = String('abc', quotes='"') + String('def', quotes=None)
     assert ret == String('abcdef')
-    assert ret.quotes is '"'
+    assert ret.quotes == '"'
 
     ret = String('abc', quotes=None) + String('def', quotes='"')
     assert ret == String('abcdef')
diff --git a/scss/types.py b/scss/types.py
index aafbab4..5e8f02a 100644
--- a/scss/types.py
+++ b/scss/types.py
@@ -1,9 +1,11 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+from __future__ import unicode_literals
 
 import colorsys
 import operator
+from warnings import warn
 
 import six
 
@@ -989,6 +991,11 @@ class String(Value):
             value = str(value)
 
         if isinstance(value, six.binary_type):
+            warn(FutureWarning(
+                "String got a bytes type {0!r} "
+                "-- this will no longer be supported in pyScss 2.0"
+                .format(value)
+            ))
             value = value.decode(DEFAULT_STRING_ENCODING)
 
         if not isinstance(value, six.text_type):
@@ -997,11 +1004,7 @@ class String(Value):
         # TODO probably disallow creating an unquoted string outside a
         # set of chars like [-a-zA-Z0-9]+
 
-        if six.PY3:
-            self.value = value
-        else:
-            # TODO well, at least 3 uses unicode everywhere
-            self.value = value.encode(DEFAULT_STRING_ENCODING)
+        self.value = value
         self.quotes = quotes
 
     @classmethod
diff --git a/scss/util.py b/scss/util.py
index 4ed4898..0cb5d20 100644
--- a/scss/util.py
+++ b/scss/util.py
@@ -1,5 +1,6 @@
 from __future__ import absolute_import
 from __future__ import print_function
+from __future__ import unicode_literals
 
 import base64
 import hashlib
@@ -106,6 +107,11 @@ def make_filename_hash(key):
     suitable for a filename.
     """
     key_repr = repr(key).encode('utf8')
+    # This is really stupid but necessary for making the repr()s be the same on
+    # Python 2 and 3 and thus allowing the test suite to run on both.
+    # TODO better solutions include: not using a repr, not embedding hashes in
+    # the expected test results
+    key_repr = re.sub(b"\\bu'", b"'", key_repr)
     key_hash = hashlib.md5(key_repr).digest()
     return base64.b64encode(key_hash, b'__').decode('ascii').rstrip('=')
author	Eevee (Alex Munroe) <eevee.git@veekun.com>	2014-03-29 16:47:01 -0700
committer	Eevee (Alex Munroe) <eevee.git@veekun.com>	2014-03-29 16:47:01 -0700
commit	48507954f5806f073ad4d913edc919dc92d933e9 (patch)
tree	b08b5af77fddc35e4e06ec178283e916bce4dd14
parent	e97461c577435fcb1e19da39e19af48437ea591f (diff)
download	pyscss-48507954f5806f073ad4d913edc919dc92d933e9.tar.gz