summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthon van der Neut <anthon@mnt.org>2017-04-18 18:44:50 +0200
committerAnthon van der Neut <anthon@mnt.org>2017-04-18 18:44:50 +0200
commita00c08eaa3b16d2c29c5179e69bb7e7120c31049 (patch)
tree6d68c7b67eddcb3bef1903169a54aaace81bafb6
parent1742d6cbadfadd6f4e16fd0c1c589ae301151c75 (diff)
downloadruamel.yaml-0.14.7.tar.gz
fix issue #115: preserve _ and leading 0s in integers0.14.7
-rw-r--r--CHANGES7
-rw-r--r--README.rst7
-rw-r--r--__init__.py4
-rw-r--r--_test/test_int.py61
-rw-r--r--constructor.py49
-rw-r--r--representer.py64
-rw-r--r--scalarint.py68
7 files changed, 213 insertions, 47 deletions
diff --git a/CHANGES b/CHANGES
index 063576e..2bd6d23 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,10 @@
+[0, 14, 7]: 2017-04-18
+ - round trip of integers (decimal, octal, hex, binary) now preserve
+ leading zero(s) padding and underscores. Underscores are presumed
+ to be at regular distances (i.e. ``0o12_345_67`` dumps back as
+ ``0o1_23_45_67`` as the space from the last digit to the
+ underscore before that is the determining factor).
+
[0, 14, 6]: 2017-04-14
- binary, octal and hex integers are now preserved by default. This
was a known deficiency. Working on this was prompted by the issue report (112)
diff --git a/README.rst b/README.rst
index ee43342..fcdefc6 100644
--- a/README.rst
+++ b/README.rst
@@ -18,6 +18,13 @@ ChangeLog
.. should insert NEXT: at the beginning of line for next key
+0.14.7 (2017-04-18):
+ - round trip of integers (decimal, octal, hex, binary) now preserve
+ leading zero(s) padding and underscores. Underscores are presumed
+ to be at regular distances (i.e. ``0o12_345_67`` dumps back as
+ ``0o1_23_45_67`` as the space from the last digit to the
+ underscore before that is the determining factor).
+
0.14.6 (2017-04-14):
- binary, octal and hex integers are now preserved by default. This
was a known deficiency. Working on this was prompted by the issue report (112)
diff --git a/__init__.py b/__init__.py
index 7963ae0..9982d17 100644
--- a/__init__.py
+++ b/__init__.py
@@ -10,8 +10,8 @@ from typing import Dict, Any # NOQA
_package_data = dict(
full_package_name='ruamel.yaml',
- version_info=(0, 14, 6),
- __version__='0.14.6',
+ version_info=(0, 14, 7),
+ __version__='0.14.7',
author='Anthon van der Neut',
author_email='a.van.der.neut@ruamel.eu',
description='ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order', # NOQA
diff --git a/_test/test_int.py b/_test/test_int.py
index 6776b51..241d14a 100644
--- a/_test/test_int.py
+++ b/_test/test_int.py
@@ -6,6 +6,8 @@ import pytest # NOQA
from roundtrip import round_trip, dedent, round_trip_load, round_trip_dump
+# http://yaml.org/type/int.html is where underscores in integers are defined
+
class TestBinHexOct:
# @pytest.mark.xfail(strict=True)
@@ -19,12 +21,13 @@ class TestBinHexOct:
""")
def test_calculate(self):
+ # make sure type, leading zero(s) and underscore are preserved
s = dedent("""\
- 42
- 0b101010
- - 0x2a
+ - 0x_2a
- 0x2A
- - 0o52
+ - 0o00_52
""")
x = round_trip_load(s)
for idx, elem in enumerate(x):
@@ -41,3 +44,57 @@ class TestBinHexOct:
elem //= t
x[idx] = elem
assert round_trip_dump(x) == s
+
+ # if a scalar int has one or more leading zeros, it is assumed that the width
+ # of the int is significant, as padding with a zero doesn't make much sense
+ # please note that none of this should work on YAML 1.1 as it collides with
+ # the old octal representation.
+
+ def test_leading_zero_hex_oct_bin(self):
+ round_trip("""\
+ - 0b0101010
+ - 0b00101010
+ - 0x02a
+ - 0x002a
+ - 0x02A
+ - 0x002A
+ - 0o052
+ - 0o0052
+ """)
+
+ def test_leading_zero_int(self):
+ round_trip("""\
+ - 042
+ - 0042
+ """)
+
+ def test_leading_zero_YAML_1_1(self):
+ d = round_trip_load("""\
+ %YAML 1.1
+ ---
+ - 042
+ - 0o42
+ """)
+ assert d[0] == 0o42
+ assert d[1] == '0o42'
+
+ def test_underscore(self):
+ round_trip("""\
+ - 0b10000_10010010
+ - 0b0_0000_1001_0010
+ - 0x2_87_57_b2_
+ - 0x0287_57B2
+ - 0x_0_2_8_7_5_7_B_2
+ - 0o2416_53662
+ - 42_42_
+ """)
+
+ def test_leading_underscore(self):
+ d = round_trip_load("""\
+ - 0x_2_8_7_5_7_B_2
+ - _42_42_
+ - 42_42_
+ """)
+ assert d[0] == 42424242
+ assert d[1] == '_42_42_'
+ assert d[2] == 4242
diff --git a/constructor.py b/constructor.py
index f7baef1..a4c9331 100644
--- a/constructor.py
+++ b/constructor.py
@@ -24,7 +24,7 @@ from ruamel.yaml.comments import (CommentedMap, CommentedOrderedMap, CommentedSe
from ruamel.yaml.scalarstring import * # NOQA
from ruamel.yaml.scalarstring import (PreservedScalarString, SingleQuotedScalarString,
DoubleQuotedScalarString, ScalarString)
-from ruamel.yaml.scalarint import BinaryInt, OctalInt, HexInt, HexCapsInt
+from ruamel.yaml.scalarint import ScalarInt, BinaryInt, OctalInt, HexInt, HexCapsInt
from ruamel.yaml.timestamp import TimeStamp
__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor',
@@ -910,8 +910,16 @@ class RoundTripConstructor(SafeConstructor):
def construct_yaml_int(self, node):
# type: (Any) -> Any
- value_s = to_str(self.construct_scalar(node))
- value_s = value_s.replace('_', '')
+ width = None # type: Any
+ value_su = to_str(self.construct_scalar(node))
+ try:
+ sx = value_su.rstrip('_')
+ underscore = [len(sx) - sx.rindex('_') - 1, False, False] # type: Any
+ except ValueError:
+ underscore = None
+ except IndexError:
+ underscore = None
+ value_s = value_su.replace('_', '')
sign = +1
if value_s[0] == '-':
sign = -1
@@ -920,9 +928,17 @@ class RoundTripConstructor(SafeConstructor):
if value_s == '0':
return 0
elif value_s.startswith('0b'):
- return BinaryInt(sign*int(value_s[2:], 2))
+ if self.resolver.processing_version > (1, 1) and value_s[2] == '0':
+ width = len(value_s[2:])
+ if underscore is not None:
+ underscore[1] = value_su[2] == '_'
+ underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_'
+ return BinaryInt(sign*int(value_s[2:], 2), width=width,
+ underscore=underscore) # type: ignore
elif value_s.startswith('0x'):
# default to lower-case if no a-fA-F in string
+ if self.resolver.processing_version > (1, 1) and value_s[2] == '0':
+ width = len(value_s[2:])
hex_fun = HexInt # type: Any
for ch in value_s[2:]:
if ch in 'ABCDEF': # first non-digit is capital
@@ -930,9 +946,18 @@ class RoundTripConstructor(SafeConstructor):
break
if ch in 'abcdef':
break
- return hex_fun(sign*int(value_s[2:], 16))
+ if underscore is not None:
+ underscore[1] = value_su[2] == '_'
+ underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_'
+ return hex_fun(sign*int(value_s[2:], 16), width=width, underscore=underscore)
elif value_s.startswith('0o'):
- return OctalInt(sign*int(value_s[2:], 8))
+ if self.resolver.processing_version > (1, 1) and value_s[2] == '0':
+ width = len(value_s[2:])
+ if underscore is not None:
+ underscore[1] = value_su[2] == '_'
+ underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_'
+ return OctalInt(sign*int(value_s[2:], 8), width=width,
+ underscore=underscore) # type: ignore
elif self.resolver.processing_version != (1, 2) and value_s[0] == '0':
return sign*int(value_s, 8)
elif self.resolver.processing_version != (1, 2) and ':' in value_s:
@@ -944,6 +969,18 @@ class RoundTripConstructor(SafeConstructor):
value += digit*base
base *= 60
return sign*value
+ elif self.resolver.processing_version > (1, 1) and value_s[0] == '0':
+ # not an octal, an integer with leading zero(s)
+ if underscore is not None:
+ # cannot have a leading underscore
+ underscore[2] = len(value_su) > 1 and value_su[-1] == '_'
+ return ScalarInt(sign*int(value_s), width=len(value_s),
+ underscore=underscore) # type: ignore
+ elif underscore:
+ # cannot have a leading underscore
+ underscore[2] = len(value_su) > 1 and value_su[-1] == '_'
+ return ScalarInt(sign*int(value_s), width=None,
+ underscore=underscore) # type: ignore
else:
return sign*int(value_s)
diff --git a/representer.py b/representer.py
index 0c19a61..5e8ce51 100644
--- a/representer.py
+++ b/representer.py
@@ -1,16 +1,15 @@
# coding: utf-8
-from __future__ import absolute_import
-from __future__ import print_function
+from __future__ import print_function, absolute_import, division
-from typing import Dict, List, Any, Union # NOQA
+from typing import Dict, List, Any, Union, Text # NOQA
from ruamel.yaml.error import * # NOQA
from ruamel.yaml.nodes import * # NOQA
from ruamel.yaml.compat import text_type, binary_type, to_unicode, PY2, PY3, ordereddict
from ruamel.yaml.scalarstring import (PreservedScalarString, SingleQuotedScalarString,
DoubleQuotedScalarString)
-from ruamel.yaml.scalarint import BinaryInt, OctalInt, HexInt, HexCapsInt
+from ruamel.yaml.scalarint import ScalarInt, BinaryInt, OctalInt, HexInt, HexCapsInt
from ruamel.yaml.timestamp import TimeStamp
import datetime
@@ -678,21 +677,66 @@ class RoundTripRepresenter(SafeRepresenter):
tag = u'tag:yaml.org,2002:str'
return self.represent_scalar(tag, data, style=style)
+ def insert_underscore(self, prefix, s, underscore):
+ # type: (Any, Any, Any) -> Any
+ if underscore is None:
+ return self.represent_scalar(u'tag:yaml.org,2002:int', prefix + s)
+ if underscore[0]:
+ sl = list(s)
+ pos = len(s) - underscore[0]
+ while pos > 0:
+ sl.insert(pos, '_')
+ pos -= underscore[0]
+ s = ''.join(sl)
+ if underscore[1]:
+ s = '_' + s
+ if underscore[2]:
+ s += '_'
+ return self.represent_scalar(u'tag:yaml.org,2002:int', prefix + s)
+
+ def represent_scalar_int(self, data):
+ # type: (Any) -> Any
+ if data._width is not None:
+ s = '{:0{}d}'.format(data, data._width)
+ else:
+ s = format(data, 'd')
+ return self.insert_underscore('', s, data._underscore)
+
def represent_binary_int(self, data):
# type: (Any) -> Any
- return self.represent_scalar(u'tag:yaml.org,2002:int', '0b' + format(data, 'b'))
+ if data._width is not None:
+ # cannot use '{:#0{}b}', that strips the zeros
+ s = '{:0{}b}'.format(data, data._width)
+ else:
+ s = format(data, 'b')
+ return self.insert_underscore('0b', s, data._underscore)
def represent_octal_int(self, data):
# type: (Any) -> Any
- return self.represent_scalar(u'tag:yaml.org,2002:int', '0o' + format(data, 'o'))
+ if data._width is not None:
+ # cannot use '{:#0{}o}', that strips the zeros
+ s = '{:0{}o}'.format(data, data._width)
+ else:
+ s = format(data, 'o')
+ return self.insert_underscore('0o', s, data._underscore)
def represent_hex_int(self, data):
# type: (Any) -> Any
- return self.represent_scalar(u'tag:yaml.org,2002:int', '0x' + format(data, 'x'))
+ if data._width is not None:
+ # cannot use '{:#0{}x}', that strips the zeros
+ s = '{:0{}x}'.format(data, data._width)
+ else:
+ s = format(data, 'x')
+ return self.insert_underscore('0x', s, data._underscore)
def represent_hex_caps_int(self, data):
# type: (Any) -> Any
- return self.represent_scalar(u'tag:yaml.org,2002:int', '0x' + format(data, 'X'))
+ if data._width is not None:
+ # cannot use '{:#0{}X}', that strips the zeros
+ s = '{:0{}X}'.format(data, data._width)
+ else:
+ s = format(data, 'X')
+ return self.insert_underscore('0x', s, data._underscore)
def represent_sequence(self, tag, sequence, flow_style=None):
# type: (Any, Any, Any) -> Any
@@ -971,6 +1015,10 @@ RoundTripRepresenter.add_representer(
RoundTripRepresenter.represent_double_quoted_scalarstring)
RoundTripRepresenter.add_representer(
+ ScalarInt,
+ RoundTripRepresenter.represent_scalar_int)
+
+RoundTripRepresenter.add_representer(
BinaryInt,
RoundTripRepresenter.represent_binary_int)
diff --git a/scalarint.py b/scalarint.py
index e2028e3..48f1962 100644
--- a/scalarint.py
+++ b/scalarint.py
@@ -11,47 +11,61 @@ __all__ = ["ScalarInt", "BinaryInt", "OctalInt", "HexInt", "HexCapsInt"]
class ScalarInt(int):
- __slots__ = ()
-
def __new__(cls, *args, **kw):
- # type: (Any, Any) -> Any
- return int.__new__(cls, *args, **kw) # type: ignore
+ # type: (Any, Any, Any) -> Any
+ width = kw.pop('width', None) # type: ignore
+ underscore = kw.pop('underscore', None) # type: ignore
+ v = int.__new__(cls, *args, **kw) # type: ignore
+ v._width = width
+ v._underscore = underscore
+ return v
def __iadd__(self, a): # type: ignore
# type: (Any) -> Any
- return type(self)(self + a)
+ x = type(self)(self + a)
+ x._width = self._width
+ x._underscore = self._underscore[:] if self._underscore is not None else None
+ return x
def __ifloordiv__(self, a): # type: ignore
# type: (Any) -> Any
- return type(self)(self // a)
+ x = type(self)(self // a)
+ x._width = self._width
+ x._underscore = self._underscore[:] if self._underscore is not None else None
+ return x
def __imul__(self, a): # type: ignore
# type: (Any) -> Any
- return type(self)(self * a)
+ x = type(self)(self * a)
+ x._width = self._width
+ x._underscore = self._underscore[:] if self._underscore is not None else None
+ return x
def __ipow__(self, a): # type: ignore
# type: (Any) -> Any
- return type(self)(self ** a)
+ x = type(self)(self ** a)
+ x._width = self._width
+ x._underscore = self._underscore[:] if self._underscore is not None else None
+ return x
def __isub__(self, a): # type: ignore
# type: (Any) -> Any
- return type(self)(self - a)
+ x = type(self)(self - a)
+ x._width = self._width
+ x._underscore = self._underscore[:] if self._underscore is not None else None
+ return x
class BinaryInt(ScalarInt):
- __slots__ = ()
-
- def __new__(cls, value):
- # type: (Text) -> Any
- return ScalarInt.__new__(cls, value)
+ def __new__(cls, value, width=None, underscore=None):
+ # type: (Any, Any, Any) -> Any
+ return ScalarInt.__new__(cls, value, width=width, underscore=underscore)
class OctalInt(ScalarInt):
- __slots__ = ()
-
- def __new__(cls, value):
- # type: (Text) -> Any
- return ScalarInt.__new__(cls, value)
+ def __new__(cls, value, width=None, underscore=None):
+ # type: (Any, Any, Any) -> Any
+ return ScalarInt.__new__(cls, value, width=width, underscore=underscore)
# mixed casing of A-F is not supported, when loading the first non digit
@@ -59,17 +73,13 @@ class OctalInt(ScalarInt):
class HexInt(ScalarInt):
"""uses lower case (a-f)"""
- __slots__ = ()
-
- def __new__(cls, value):
- # type: (Text) -> Any
- return ScalarInt.__new__(cls, value)
+ def __new__(cls, value, width=None, underscore=None):
+ # type: (Any, Any, Any) -> Any
+ return ScalarInt.__new__(cls, value, width=width, underscore=underscore)
class HexCapsInt(ScalarInt):
"""uses upper case (A-F)"""
- __slots__ = ()
-
- def __new__(cls, value):
- # type: (Text) -> Any
- return ScalarInt.__new__(cls, value)
+ def __new__(cls, value, width=None, underscore=None):
+ # type: (Any, Any, Any) -> Any
+ return ScalarInt.__new__(cls, value, width=width, underscore=underscore)