summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthon van der Neut <anthon@mnt.org>2018-06-16 23:41:43 +0200
committerAnthon van der Neut <anthon@mnt.org>2018-06-16 23:41:43 +0200
commit15008733ed560cb506871f1c4fac99c99e53bfe6 (patch)
treea30c9db2939a72720bac131b8b2075c9d72be500
parent6805e6c06a7255f737d13efc9e1002ff6f254039 (diff)
parent13cd9b56e285a0ddf4c64e4c5347485bd9ec6546 (diff)
downloadruamel.yaml-15008733ed560cb506871f1c4fac99c99e53bfe6.tar.gz
merge PR27 improving startup time0.15.39
-rw-r--r--CHANGES5
-rw-r--r--README.rst5
-rw-r--r--__init__.py4
-rw-r--r--constructor.py3
-rw-r--r--reader.py53
-rw-r--r--resolver.py39
-rw-r--r--serializer.py5
-rw-r--r--util.py33
8 files changed, 111 insertions, 36 deletions
diff --git a/CHANGES b/CHANGES
index 4973654..2111c36 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,8 @@
+[0, 15, 39]: 2018-06-16
+ - merge PR27 improving package startup time (and loading when regexp not
+ actually used), provided by
+ `Marcel Bargull <https://bitbucket.org/mbargull/>`__
+
[0, 15, 37]: 2018-06-13
- fix for losing precision when roundtripping floats by
`Rolf Wojtech <https://bitbucket.org/asomov/>`__
diff --git a/README.rst b/README.rst
index 92c574c..6d787c6 100644
--- a/README.rst
+++ b/README.rst
@@ -35,6 +35,11 @@ ChangeLog
.. should insert NEXT: at the beginning of line for next key (with empty line)
+0.15.39 (2018-06-16):
+ - merge PR27 improving package startup time (and loading when regexp not
+ actually used), provided by
+ `Marcel Bargull <https://bitbucket.org/mbargull/>`__
+
0.15.38 (2018-06-13):
- fix for losing precision when roundtripping floats by
`Rolf Wojtech <https://bitbucket.org/asomov/>`__
diff --git a/__init__.py b/__init__.py
index 79f46f3..0474160 100644
--- a/__init__.py
+++ b/__init__.py
@@ -7,8 +7,8 @@ if False: # MYPY
_package_data = dict(
full_package_name='ruamel.yaml',
- version_info=(0, 15, 38),
- __version__='0.15.38',
+ version_info=(0, 15, 39),
+ __version__='0.15.39',
author='Anthon van der Neut',
author_email='a.van.der.neut@ruamel.eu',
description='ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order', # NOQA
diff --git a/constructor.py b/constructor.py
index 9706bbe..7769874 100644
--- a/constructor.py
+++ b/constructor.py
@@ -26,6 +26,7 @@ from ruamel.yaml.scalarstring import (PreservedScalarString, SingleQuotedScalarS
from ruamel.yaml.scalarint import ScalarInt, BinaryInt, OctalInt, HexInt, HexCapsInt
from ruamel.yaml.scalarfloat import ScalarFloat
from ruamel.yaml.timestamp import TimeStamp
+from ruamel.yaml.util import RegExp
if False: # MYPY
from typing import Any, Dict, List, Set, Generator # NOQA
@@ -471,7 +472,7 @@ class SafeConstructor(BaseConstructor):
None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
- timestamp_regexp = re.compile(
+ timestamp_regexp = RegExp(
u'''^(?P<year>[0-9][0-9][0-9][0-9])
-(?P<month>[0-9][0-9]?)
-(?P<day>[0-9][0-9]?)
diff --git a/reader.py b/reader.py
index 9ad2b5f..c454925 100644
--- a/reader.py
+++ b/reader.py
@@ -25,9 +25,10 @@ import re
from ruamel.yaml.error import YAMLError, FileMark, StringMark, YAMLStreamError
from ruamel.yaml.compat import text_type, binary_type, PY3
+from ruamel.yaml.util import RegExp
if False: # MYPY
- from typing import Any, Dict, Optional, List, Union, Text # NOQA
+ from typing import Any, Dict, Optional, List, Union, Text, Tuple # NOQA
from ruamel.yaml.compat import StreamTextType # NOQA
__all__ = ['Reader', 'ReaderError']
@@ -181,29 +182,59 @@ class Reader(object):
# 4 if 32 bit unicode supported, 2 e.g. on MacOS (issue 56)
try:
- NON_PRINTABLE = re.compile(
+ re.compile(u'[^\U00010000]')
+ except:
+ NON_PRINTABLE = RegExp(
u'[^\x09\x0A\x0D\x20-\x7E\x85'
u'\xA0-\uD7FF'
u'\uE000-\uFFFD'
- u'\U00010000-\U0010FFFF'
u']'
)
- UNICODE_SIZE = 4
- except:
- NON_PRINTABLE = re.compile(
+ UNICODE_SIZE = 2
+ else:
+ NON_PRINTABLE = RegExp(
u'[^\x09\x0A\x0D\x20-\x7E\x85'
u'\xA0-\uD7FF'
u'\uE000-\uFFFD'
+ u'\U00010000-\U0010FFFF'
u']'
)
- UNICODE_SIZE = 2
+ UNICODE_SIZE = 4
+
+ _printable_ascii = ('\x09\x0A\x0D' + ''.join(map(chr, range(0x20, 0x7F)))).encode('ascii')
+
+ @classmethod
+ def _get_non_printable_ascii(cls, data):
+ # type: (Text, bytes) -> Union[None, Tuple[int, Text]]
+ ascii_bytes = data.encode('ascii')
+ non_printables = ascii_bytes.translate(None, cls._printable_ascii)
+ if not non_printables:
+ return None
+ non_printable = non_printables[:1]
+ return ascii_bytes.index(non_printable), non_printable.decode('ascii')
+
+ @classmethod
+ def _get_non_printable_regex(cls, data):
+ # type: (Text) -> Union[None, Tuple[int, Text]]
+ match = cls.NON_PRINTABLE.search(data)
+ if not bool(match):
+ return None
+ return match.start(), match.group()
+
+ @classmethod
+ def _get_non_printable(cls, data):
+ # type: (Text) -> Union[None, Tuple[int, Text]]
+ try:
+ return cls._get_non_printable_ascii(data)
+ except UnicodeEncodeError:
+ return cls._get_non_printable_regex(data)
def check_printable(self, data):
# type: (Any) -> None
- match = self.NON_PRINTABLE.search(data)
- if bool(match):
- character = match.group()
- position = self.index + (len(self.buffer) - self.pointer) + match.start()
+ non_printable_match = self._get_non_printable(data)
+ if non_printable_match is not None:
+ start, character = non_printable_match
+ position = self.index + (len(self.buffer) - self.pointer) + start
raise ReaderError(self.name, position, ord(character),
'unicode', "special characters are not allowed")
diff --git a/resolver.py b/resolver.py
index 85e640d..9e5d320 100644
--- a/resolver.py
+++ b/resolver.py
@@ -11,6 +11,7 @@ if False: # MYPY
from ruamel.yaml.compat import string_types, _DEFAULT_YAML_VERSION # NOQA
from ruamel.yaml.error import * # NOQA
from ruamel.yaml.nodes import * # NOQA
+from ruamel.yaml.util import RegExp # NOQA
__all__ = ['BaseResolver', 'Resolver', 'VersionedResolver']
@@ -23,17 +24,17 @@ __all__ = ['BaseResolver', 'Resolver', 'VersionedResolver']
implicit_resolvers = [
([(1, 2)],
u'tag:yaml.org,2002:bool',
- re.compile(u'''^(?:true|True|TRUE|false|False|FALSE)$''', re.X),
+ RegExp(u'''^(?:true|True|TRUE|false|False|FALSE)$''', re.X),
list(u'tTfF')),
([(1, 1)],
u'tag:yaml.org,2002:bool',
- re.compile(u'''^(?:yes|Yes|YES|no|No|NO
+ RegExp(u'''^(?:yes|Yes|YES|no|No|NO
|true|True|TRUE|false|False|FALSE
|on|On|ON|off|Off|OFF)$''', re.X),
list(u'yYnNtTfFoO')),
([(1, 2)],
u'tag:yaml.org,2002:float',
- re.compile(u'''^(?:
+ RegExp(u'''^(?:
[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
|[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
|\\.[0-9_]+(?:[eE][-+][0-9]+)?
@@ -42,7 +43,7 @@ implicit_resolvers = [
list(u'-+0123456789.')),
([(1, 1)],
u'tag:yaml.org,2002:float',
- re.compile(u'''^(?:
+ RegExp(u'''^(?:
[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
|[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
|\\.[0-9_]+(?:[eE][-+][0-9]+)?
@@ -52,14 +53,14 @@ implicit_resolvers = [
list(u'-+0123456789.')),
([(1, 2)],
u'tag:yaml.org,2002:int',
- re.compile(u'''^(?:[-+]?0b[0-1_]+
+ RegExp(u'''^(?:[-+]?0b[0-1_]+
|[-+]?0o?[0-7_]+
|[-+]?(?:0|[1-9][0-9_]*)
|[-+]?0x[0-9a-fA-F_]+)$''', re.X),
list(u'-+0123456789')),
([(1, 1)],
u'tag:yaml.org,2002:int',
- re.compile(u'''^(?:[-+]?0b[0-1_]+
+ RegExp(u'''^(?:[-+]?0b[0-1_]+
|[-+]?0?[0-7_]+
|[-+]?(?:0|[1-9][0-9_]*)
|[-+]?0x[0-9a-fA-F_]+
@@ -67,17 +68,17 @@ implicit_resolvers = [
list(u'-+0123456789')),
([(1, 2), (1, 1)],
u'tag:yaml.org,2002:merge',
- re.compile(u'^(?:<<)$'),
+ RegExp(u'^(?:<<)$'),
[u'<']),
([(1, 2), (1, 1)],
u'tag:yaml.org,2002:null',
- re.compile(u'''^(?: ~
+ RegExp(u'''^(?: ~
|null|Null|NULL
| )$''', re.X),
[u'~', u'n', u'N', u'']),
([(1, 2), (1, 1)],
u'tag:yaml.org,2002:timestamp',
- re.compile(u'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
+ RegExp(u'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
|[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
(?:[Tt]|[ \\t]+)[0-9][0-9]?
:[0-9][0-9] :[0-9][0-9] (?:\\.[0-9]*)?
@@ -85,13 +86,13 @@ implicit_resolvers = [
list(u'0123456789')),
([(1, 2), (1, 1)],
u'tag:yaml.org,2002:value',
- re.compile(u'^(?:=)$'),
+ RegExp(u'^(?:=)$'),
[u'=']),
# The following resolver is only for documentation purposes. It cannot work
# because plain scalars cannot start with '!', '&', or '*'.
([(1, 2), (1, 1)],
u'tag:yaml.org,2002:yaml',
- re.compile(u'^(?:!|&|\\*)$'),
+ RegExp(u'^(?:!|&|\\*)$'),
list(u'!&*')),
]
@@ -308,14 +309,14 @@ class Resolver(BaseResolver):
Resolver.add_implicit_resolver_base(
u'tag:yaml.org,2002:bool',
- re.compile(u'''^(?:yes|Yes|YES|no|No|NO
+ RegExp(u'''^(?:yes|Yes|YES|no|No|NO
|true|True|TRUE|false|False|FALSE
|on|On|ON|off|Off|OFF)$''', re.X),
list(u'yYnNtTfFoO'))
Resolver.add_implicit_resolver_base(
u'tag:yaml.org,2002:float',
- re.compile(u'''^(?:
+ RegExp(u'''^(?:
[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
|[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
|\\.[0-9_]+(?:[eE][-+][0-9]+)?
@@ -326,7 +327,7 @@ Resolver.add_implicit_resolver_base(
Resolver.add_implicit_resolver_base(
u'tag:yaml.org,2002:int',
- re.compile(u'''^(?:[-+]?0b[0-1_]+
+ RegExp(u'''^(?:[-+]?0b[0-1_]+
|[-+]?0o?[0-7_]+
|[-+]?(?:0|[1-9][0-9_]*)
|[-+]?0x[0-9a-fA-F_]+
@@ -335,19 +336,19 @@ Resolver.add_implicit_resolver_base(
Resolver.add_implicit_resolver_base(
u'tag:yaml.org,2002:merge',
- re.compile(u'^(?:<<)$'),
+ RegExp(u'^(?:<<)$'),
[u'<'])
Resolver.add_implicit_resolver_base(
u'tag:yaml.org,2002:null',
- re.compile(u'''^(?: ~
+ RegExp(u'''^(?: ~
|null|Null|NULL
| )$''', re.X),
[u'~', u'n', u'N', u''])
Resolver.add_implicit_resolver_base(
u'tag:yaml.org,2002:timestamp',
- re.compile(u'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
+ RegExp(u'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
|[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
(?:[Tt]|[ \\t]+)[0-9][0-9]?
:[0-9][0-9] :[0-9][0-9] (?:\\.[0-9]*)?
@@ -356,14 +357,14 @@ Resolver.add_implicit_resolver_base(
Resolver.add_implicit_resolver_base(
u'tag:yaml.org,2002:value',
- re.compile(u'^(?:=)$'),
+ RegExp(u'^(?:=)$'),
[u'='])
# The following resolver is only for documentation purposes. It cannot work
# because plain scalars cannot start with '!', '&', or '*'.
Resolver.add_implicit_resolver_base(
u'tag:yaml.org,2002:yaml',
- re.compile(u'^(?:!|&|\\*)$'),
+ RegExp(u'^(?:!|&|\\*)$'),
list(u'!&*'))
diff --git a/serializer.py b/serializer.py
index 46884b5..d6012db 100644
--- a/serializer.py
+++ b/serializer.py
@@ -2,10 +2,9 @@
from __future__ import absolute_import
-import re
-
from ruamel.yaml.error import YAMLError
from ruamel.yaml.compat import nprint, DBG_NODE, dbg, string_types
+from ruamel.yaml.util import RegExp
from ruamel.yaml.events import (
StreamStartEvent, StreamEndEvent, MappingStartEvent, MappingEndEvent,
@@ -31,7 +30,7 @@ class Serializer(object):
# 'id' and 3+ numbers, but not 000
ANCHOR_TEMPLATE = u'id%03d'
- ANCHOR_RE = re.compile(u'id(?!000$)\\d{3,}')
+ ANCHOR_RE = RegExp(u'id(?!000$)\\d{3,}')
def __init__(self, encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, dumper=None):
diff --git a/util.py b/util.py
index 9f939cf..e5e816f 100644
--- a/util.py
+++ b/util.py
@@ -6,6 +6,9 @@ some helper functions that might be generally useful
from __future__ import absolute_import, print_function
+from functools import partial
+import re
+
from .compat import text_type, binary_type
if False: # MYPY
@@ -13,6 +16,36 @@ if False: # MYPY
from .compat import StreamTextType # NOQA
+class LazyEval(object):
+ """
+ Lightweight wrapper around lazily evaluated func(*args, **kwargs).
+
+ func is only evaluated when any attribute of its return value is accessed.
+ Every attribute access is passed through to the wrapped value.
+ (This only excludes special cases like method-wrappers, e.g., __hash__.)
+ The sole additional attribute is the lazy_self function which holds the
+ return value (or, prior to evaluation, func and arguments), in its closure.
+ """
+ def __init__(self, func, *args, **kwargs):
+ def lazy_self():
+ return_value = func(*args, **kwargs)
+ object.__setattr__(self, "lazy_self", lambda: return_value)
+ return return_value
+ object.__setattr__(self, "lazy_self", lazy_self)
+
+ def __getattribute__(self, name):
+ lazy_self = object.__getattribute__(self, "lazy_self")
+ if name == "lazy_self":
+ return lazy_self
+ return getattr(lazy_self(), name)
+
+ def __setattr__(self, name, value):
+ setattr(self.lazy_self(), name, value)
+
+
+RegExp = partial(LazyEval, re.compile)
+
+
# originally as comment
# https://github.com/pre-commit/pre-commit/pull/211#issuecomment-186466605
# if you use this in your code, I suggest adding a test in your test suite