From d044a672cb2824c47041c71ea53d56482517375e Mon Sep 17 00:00:00 2001 From: Anthon van der Neut Date: Mon, 5 Sep 2016 09:26:11 +0200 Subject: ignore full range of specified characters Fix for issue #55, incorporating pull request #11 both by Harguroicha Hsu --- README.rst | 4 ++++ __init__.py | 2 +- _test/test_comments.py | 22 +++++++++++++++++++++- reader.py | 7 ++++++- 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 57071da..b381532 100644 --- a/README.rst +++ b/README.rst @@ -18,6 +18,10 @@ ChangeLog :: + 0.12.8 (2016-09-xx): + - To be ignored Unicode characters were not properly regex matched + (no specific tests, PR by Haraguroicha Hsu) + 0.12.7 (2016-09-03): - fixing issue 54 empty lines with spaces (reported by Alex Harvey) diff --git a/__init__.py b/__init__.py index ff4b33c..728eee9 100644 --- a/__init__.py +++ b/__init__.py @@ -9,7 +9,7 @@ from __future__ import absolute_import _package_data = dict( full_package_name="ruamel.yaml", - version_info=(0, 12, 7), + version_info=(0, 12, 8), author="Anthon van der Neut", author_email="a.van.der.neut@ruamel.eu", description="ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order", # NOQA diff --git a/_test/test_comments.py b/_test/test_comments.py index 63895cc..0e52031 100644 --- a/_test/test_comments.py +++ b/_test/test_comments.py @@ -494,7 +494,7 @@ class TestCommentedMapMerge: class TestEmptyLines: # prompted by issue 46 from Alex Harvey - def Xtest_issue_46(self): + def test_issue_46(self): yaml_str = dedent("""\ --- # Please add key/value pairs in alphabetical order @@ -591,3 +591,23 @@ class TestEmptyLines: stripped += line.rstrip() + '\n' print(line + '$') assert stripped == y + + +class TestUnicodeComments: + def test_issue_55(self): # reported by Haraguroicha Hsu + round_trip("""\ + name: TEST + description: test using + author: Harguroicha + sql: + command: |- + select name from testtbl where no = :no + + ci-test: + - :no: 04043709 # 小花 + - :no: 05161690 # 茶 + - :no: 05293147 # 〇𤋥川 + - :no: 05338777 # 〇〇啓 + - :no: 05273867 # 〇 + - :no: 05205786 # 〇𤦌 + """) diff --git a/reader.py b/reader.py index 1257f52..fe6f505 100644 --- a/reader.py +++ b/reader.py @@ -145,7 +145,12 @@ class Reader(object): self.update(1) NON_PRINTABLE = re.compile( - u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]') + u'[^\x09\x0A\x0D\x20-\x7E\x85' + u'\xA0-\uD7FF' + u'\uE000-\uFFFD' + u'\U00010000-\U0010FFFF' + u']' + ) def check_printable(self, data): match = self.NON_PRINTABLE.search(data) -- cgit v1.2.1