From 1daf463ae09e3db7cb960775087f21ff5ce25dc6 Mon Sep 17 00:00:00 2001 From: Yu-Jie Lin Date: Sat, 21 Sep 2013 05:24:32 +0800 Subject: fix hyphens do not cause a comment not a comment, add more tests In both HTML4 and HTML5, two hyphens should not (HTML4) or must not (HTML5) be contained in comments. This would make sure if they appear in a comment, then the entire comment will be treated as text, which means it will be converted. --- CHANGES.rst | 6 ++++-- smartypants.py | 24 ++++++++++++++++++++++-- tests/test.py | 36 +++++++++++++++++++++++++++++++++--- 3 files changed, 59 insertions(+), 7 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8c7ba50..d62459e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,8 +14,10 @@ Versions without timestamps mean they are future releases. and ``processEscapes`` development: - - fix ``_tokenize`` turning ``--`` of HTML comment end tag ``-->`` - (pull request #1) + - fix ``_tokenize`` can not handle HTML comment properly + + This fix includes pull request #1 with modification for handling + ``--`` appears in a comment, which makes the comment not a comment. 1.8.2: 2013-08-28T11:38:42Z - add documentation generation diff --git a/smartypants.py b/smartypants.py index f8d56f3..1580d26 100755 --- a/smartypants.py +++ b/smartypants.py @@ -709,7 +709,7 @@ def _tokenize(text): tokens = [] - tag_soup = re.compile(r"""(?s)([^<]*)("), "—>") - self.assertEqual(sp(""), "") - self.assertEqual(sp(""), - "") + self.assertEqual(sp("-- \t >"), "— \t >") + + TEXT = ' blah--blah ' + T = sp(TEXT) + E = ' blah—blah ' + self.assertEqual(T, E) + + TEXT = ( + '

foo -- "bar"\n' + '

' + ) + + T = sp(TEXT) + E = ( + '

foo — “bar”\n' + '

' + ) + self.assertEqual(T, E) + + # nothing should be converted + for TEXT in ('', + '', + ' '): + self.assertEqual(sp(TEXT), TEXT) + + # not comments + self.assertEqual(sp(''), '') + self.assertEqual(sp('