diff options
Diffstat (limited to 'Lib/test/test_tokenize.py')
-rw-r--r-- | Lib/test/test_tokenize.py | 41 |
1 files changed, 30 insertions, 11 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 510e632565..11e6fb4596 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - doctests = """ Tests for the tokenize module. @@ -42,7 +40,7 @@ brevity. ... IndentationError: unindent does not match any outer indentation level -There are some standard formattig practises that are easy to get right. +There are some standard formatting practices that are easy to get right. >>> roundtrip("if x == 1:\\n" ... " print(x)\\n") @@ -566,7 +564,8 @@ Non-ascii identifiers from test import support from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, - STRING, ENDMARKER, tok_name, detect_encoding) + STRING, ENDMARKER, tok_name, detect_encoding, + open as tokenize_open) from io import BytesIO from unittest import TestCase import os, sys, glob @@ -613,11 +612,11 @@ def decistmt(s): The format of the exponent is inherited from the platform C library. Known cases are "e-007" (Windows) and "e-07" (not Windows). Since - we're only showing 12 digits, and the 13th isn't close to 5, the + we're only showing 11 digits, and the 12th isn't close to 5, the rest of the output should be platform-independent. >>> exec(s) #doctest: +ELLIPSIS - -3.21716034272e-0...7 + -3.2171603427...e-0...7 Output from calculations with Decimal should be identical across all platforms. @@ -741,7 +740,7 @@ class TestDetectEncoding(TestCase): b'do_something(else)\n' ) encoding, consumed_lines = detect_encoding(self.get_readline(lines)) - self.assertEqual(encoding, 'utf-8') + self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'# something\n', b'print(something)\n']) @@ -762,7 +761,7 @@ class TestDetectEncoding(TestCase): b'do_something(else)\n' ) encoding, consumed_lines = detect_encoding(self.get_readline(lines)) - self.assertEqual(encoding, 'utf-8') + self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'# coding=utf-8\n']) def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self): @@ -794,7 +793,7 @@ class TestDetectEncoding(TestCase): b'do_something(else)\n' ) encoding, consumed_lines = detect_encoding(self.get_readline(lines)) - self.assertEqual(encoding, 'utf-8') + self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'#! something\n', b'f# coding=utf-8\n']) @@ -848,17 +847,37 @@ class TestDetectEncoding(TestCase): readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',)) encoding, consumed_lines = detect_encoding(readline) - self.assertEqual(encoding, 'utf-8') + self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'print(something)\n']) readline = self.get_readline((b'\xef\xbb\xbf',)) encoding, consumed_lines = detect_encoding(readline) - self.assertEqual(encoding, 'utf-8') + self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, []) readline = self.get_readline((b'# coding: bad\n',)) self.assertRaises(SyntaxError, detect_encoding, readline) + def test_open(self): + filename = support.TESTFN + '.py' + self.addCleanup(support.unlink, filename) + + # test coding cookie + for encoding in ('iso-8859-15', 'utf-8'): + with open(filename, 'w', encoding=encoding) as fp: + print("# coding: %s" % encoding, file=fp) + print("print('euro:\u20ac')", file=fp) + with tokenize_open(filename) as fp: + self.assertEqual(fp.encoding, encoding) + self.assertEqual(fp.mode, 'r') + + # test BOM (no coding cookie) + with open(filename, 'w', encoding='utf-8-sig') as fp: + print("print('euro:\u20ac')", file=fp) + with tokenize_open(filename) as fp: + self.assertEqual(fp.encoding, 'utf-8-sig') + self.assertEqual(fp.mode, 'r') + class TestTokenize(TestCase): def test_tokenize(self): |