2 files changed, 15 insertions, 2 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 9e9656ced6..63d084df73 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -825,6 +825,16 @@ class TestDetectEncoding(TestCase):
                 found, consumed_lines = detect_encoding(rl)
                 self.assertEqual(found, "iso-8859-1")
 
+    def test_syntaxerror_latin1(self):
+        # Issue 14629: need to raise SyntaxError if the first
+        # line(s) have non-UTF-8 characters
+        lines = (
+            b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
+            )
+        readline = self.get_readline(lines)
+        self.assertRaises(SyntaxError, detect_encoding, readline)
+
+
     def test_utf8_normalization(self):
         # See get_normal_name() in tokenizer.c.
         encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index f575e9bc23..f283c6dd7f 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -292,9 +292,12 @@ def detect_encoding(readline):
 
     def find_cookie(line):
         try:
-            line_string = line.decode('ascii')
+            # Decode as UTF-8. Either the line is an encoding declaration,
+            # in which case it should be pure ASCII, or it must be UTF-8
+            # per default encoding.
+            line_string = line.decode('utf-8')
         except UnicodeDecodeError:
-            return None
+            raise SyntaxError("invalid or missing encoding declaration")
 
         matches = cookie_re.findall(line_string)
         if not matches: