1 files changed, 122 insertions, 102 deletions
diff --git a/logilab/common/textutils.py b/logilab/common/textutils.py
index 4b6ea98..b988c7a 100644
--- a/logilab/common/textutils.py
+++ b/logilab/common/textutils.py
@@ -50,33 +50,37 @@ from re import Pattern, Match
 from warnings import warn
 from unicodedata import normalize as _uninormalize
 from typing import Any, Optional, Tuple, List, Callable, Dict, Union
+
 try:
     from os import linesep
 except ImportError:
-    linesep = '\n' # gae
+    linesep = "\n"  # gae
 
 from logilab.common.deprecation import deprecated
 
 MANUAL_UNICODE_MAP = {
-    u'\xa1': u'!',    # INVERTED EXCLAMATION MARK
-    u'\u0142': u'l',  # LATIN SMALL LETTER L WITH STROKE
-    u'\u2044': u'/',  # FRACTION SLASH
-    u'\xc6': u'AE',   # LATIN CAPITAL LETTER AE
-    u'\xa9': u'(c)',  # COPYRIGHT SIGN
-    u'\xab': u'"',    # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xe6': u'ae',   # LATIN SMALL LETTER AE
-    u'\xae': u'(r)',  # REGISTERED SIGN
-    u'\u0153': u'oe', # LATIN SMALL LIGATURE OE
-    u'\u0152': u'OE', # LATIN CAPITAL LIGATURE OE
-    u'\xd8': u'O',    # LATIN CAPITAL LETTER O WITH STROKE
-    u'\xf8': u'o',    # LATIN SMALL LETTER O WITH STROKE
-    u'\xbb': u'"',    # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xdf': u'ss',   # LATIN SMALL LETTER SHARP S
-    u'\u2013': u'-',  # HYPHEN
-    u'\u2019': u"'",  # SIMPLE QUOTE
-    }
-
-def unormalize(ustring: str, ignorenonascii: Optional[Any] = None, substitute: Optional[str] = None) -> str:
+    "\xa1": "!",  # INVERTED EXCLAMATION MARK
+    "\u0142": "l",  # LATIN SMALL LETTER L WITH STROKE
+    "\u2044": "/",  # FRACTION SLASH
+    "\xc6": "AE",  # LATIN CAPITAL LETTER AE
+    "\xa9": "(c)",  # COPYRIGHT SIGN
+    "\xab": '"',  # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    "\xe6": "ae",  # LATIN SMALL LETTER AE
+    "\xae": "(r)",  # REGISTERED SIGN
+    "\u0153": "oe",  # LATIN SMALL LIGATURE OE
+    "\u0152": "OE",  # LATIN CAPITAL LIGATURE OE
+    "\xd8": "O",  # LATIN CAPITAL LETTER O WITH STROKE
+    "\xf8": "o",  # LATIN SMALL LETTER O WITH STROKE
+    "\xbb": '"',  # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    "\xdf": "ss",  # LATIN SMALL LETTER SHARP S
+    "\u2013": "-",  # HYPHEN
+    "\u2019": "'",  # SIMPLE QUOTE
+}
+
+
+def unormalize(
+    ustring: str, ignorenonascii: Optional[Any] = None, substitute: Optional[str] = None
+) -> str:
     """replace diacritical characters with their corresponding ascii characters
 
     Convert the unicode string to its long normalized form (unicode character
@@ -92,22 +96,26 @@ def unormalize(ustring: str, ignorenonascii: Optional[Any] = None, substitute: O
     """
     # backward compatibility, ignorenonascii was a boolean
     if ignorenonascii is not None:
-        warn("ignorenonascii is deprecated, use substitute named parameter instead",
-             DeprecationWarning, stacklevel=2)
+        warn(
+            "ignorenonascii is deprecated, use substitute named parameter instead",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         if ignorenonascii:
-            substitute = ''
+            substitute = ""
     res = []
     for letter in ustring[:]:
         try:
             replacement = MANUAL_UNICODE_MAP[letter]
         except KeyError:
-            replacement = _uninormalize('NFKD', letter)[0]
+            replacement = _uninormalize("NFKD", letter)[0]
             if ord(replacement) >= 2 ** 7:
                 if substitute is None:
                     raise ValueError("can't deal with non-ascii based characters")
                 replacement = substitute
         res.append(replacement)
-    return u''.join(res)
+    return "".join(res)
+
 
 def unquote(string: str) -> str:
     """remove optional quotes (simple or double) from the string
@@ -120,17 +128,18 @@ def unquote(string: str) -> str:
     """
     if not string:
         return string
-    if string[0] in '"\'':
+    if string[0] in "\"'":
         string = string[1:]
-    if string[-1] in '"\'':
+    if string[-1] in "\"'":
         string = string[:-1]
     return string
 
 
-_BLANKLINES_RGX = re.compile('\r?\n\r?\n')
-_NORM_SPACES_RGX = re.compile('\s+')
+_BLANKLINES_RGX = re.compile("\r?\n\r?\n")
+_NORM_SPACES_RGX = re.compile("\s+")
+
 
-def normalize_text(text: str, line_len: int = 80, indent: str = '', rest: bool = False) -> str:
+def normalize_text(text: str, line_len: int = 80, indent: str = "", rest: bool = False) -> str:
     """normalize a text to display it with a maximum line size and
     optionally arbitrary indentation. Line jumps are normalized but blank
     lines are kept. The indentation string may be used to insert a
@@ -158,10 +167,10 @@ def normalize_text(text: str, line_len: int = 80, indent: str = '', rest: bool =
     result = []
     for text in _BLANKLINES_RGX.split(text):
         result.append(normp(text, line_len, indent))
-    return ('%s%s%s' % (linesep, indent, linesep)).join(result)
+    return ("%s%s%s" % (linesep, indent, linesep)).join(result)
 
 
-def normalize_paragraph(text: str, line_len: int = 80, indent: str = '') -> str:
+def normalize_paragraph(text: str, line_len: int = 80, indent: str = "") -> str:
     """normalize a text to display it with a maximum line size and
     optionally arbitrary indentation. Line jumps are normalized. The
     indentation string may be used top insert a comment mark for
@@ -182,7 +191,7 @@ def normalize_paragraph(text: str, line_len: int = 80, indent: str = '') -> str:
       inferior to `line_len`, and optionally prefixed by an
       indentation string
     """
-    text = _NORM_SPACES_RGX.sub(' ', text)
+    text = _NORM_SPACES_RGX.sub(" ", text)
     line_len = line_len - len(indent)
     lines = []
     while text:
@@ -190,7 +199,8 @@ def normalize_paragraph(text: str, line_len: int = 80, indent: str = '') -> str:
         lines.append(indent + aline)
     return linesep.join(lines)
 
-def normalize_rest_paragraph(text: str, line_len: int = 80, indent: str = '') -> str:
+
+def normalize_rest_paragraph(text: str, line_len: int = 80, indent: str = "") -> str:
     """normalize a ReST text to display it with a maximum line size and
     optionally arbitrary indentation. Line jumps are normalized. The
     indentation string may be used top insert a comment mark for
@@ -211,21 +221,21 @@ def normalize_rest_paragraph(text: str, line_len: int = 80, indent: str = '') ->
       inferior to `line_len`, and optionally prefixed by an
       indentation string
     """
-    toreport = ''
+    toreport = ""
     lines = []
     line_len = line_len - len(indent)
     for line in text.splitlines():
-        line = toreport + _NORM_SPACES_RGX.sub(' ', line.strip())
-        toreport = ''
+        line = toreport + _NORM_SPACES_RGX.sub(" ", line.strip())
+        toreport = ""
         while len(line) > line_len:
             # too long line, need split
             line, toreport = splittext(line, line_len)
             lines.append(indent + line)
             if toreport:
-                line = toreport + ' '
-                toreport = ''
+                line = toreport + " "
+                toreport = ""
             else:
-                line = ''
+                line = ""
         if line:
             lines.append(indent + line.strip())
     return linesep.join(lines)
@@ -239,18 +249,18 @@ def splittext(text: str, line_len: int) -> Tuple[str, str]:
     * the rest of the text which has to be reported on another line
     """
     if len(text) <= line_len:
-        return text, ''
-    pos = min(len(text)-1, line_len)
-    while pos > 0 and text[pos] != ' ':
+        return text, ""
+    pos = min(len(text) - 1, line_len)
+    while pos > 0 and text[pos] != " ":
         pos -= 1
     if pos == 0:
         pos = min(len(text), line_len)
-        while len(text) > pos and text[pos] != ' ':
+        while len(text) > pos and text[pos] != " ":
             pos += 1
-    return text[:pos], text[pos+1:].strip()
+    return text[:pos], text[pos + 1 :].strip()
 
 
-def splitstrip(string: str, sep: str = ',') -> List[str]:
+def splitstrip(string: str, sep: str = ",") -> List[str]:
     """return a list of stripped string by splitting the string given as
     argument on `sep` (',' by default). Empty string are discarded.
 
@@ -271,15 +281,16 @@ def splitstrip(string: str, sep: str = ',') -> List[str]:
     """
     return [word.strip() for word in string.split(sep) if word.strip()]
 
-get_csv = deprecated('get_csv is deprecated, use splitstrip')(splitstrip)
+
+get_csv = deprecated("get_csv is deprecated, use splitstrip")(splitstrip)
 
 
 def split_url_or_path(url_or_path):
     """return the latest component of a string containing either an url of the
     form <scheme>://<path> or a local file system path
     """
-    if '://' in url_or_path:
-        return url_or_path.rstrip('/').rsplit('/', 1)
+    if "://" in url_or_path:
+        return url_or_path.rstrip("/").rsplit("/", 1)
     return osp.split(url_or_path.rstrip(osp.sep))
 
 
@@ -303,8 +314,8 @@ def text_to_dict(text):
         return res
     for line in text.splitlines():
         line = line.strip()
-        if line and not line.startswith('#'):
-            key, value = [w.strip() for w in line.split('=', 1)]
+        if line and not line.startswith("#"):
+            key, value = [w.strip() for w in line.split("=", 1)]
             if key in res:
                 try:
                     res[key].append(value)
@@ -315,13 +326,12 @@ def text_to_dict(text):
     return res
 
 
-_BLANK_URE = r'(\s|,)+'
+_BLANK_URE = r"(\s|,)+"
 _BLANK_RE = re.compile(_BLANK_URE)
-__VALUE_URE = r'-?(([0-9]+\.[0-9]*)|((0x?)?[0-9]+))'
-__UNITS_URE = r'[a-zA-Z]+'
-_VALUE_RE = re.compile(r'(?P<value>%s)(?P<unit>%s)?'%(__VALUE_URE, __UNITS_URE))
-_VALIDATION_RE = re.compile(r'^((%s)(%s))*(%s)?$' % (__VALUE_URE, __UNITS_URE,
-                                                    __VALUE_URE))
+__VALUE_URE = r"-?(([0-9]+\.[0-9]*)|((0x?)?[0-9]+))"
+__UNITS_URE = r"[a-zA-Z]+"
+_VALUE_RE = re.compile(r"(?P<value>%s)(?P<unit>%s)?" % (__VALUE_URE, __UNITS_URE))
+_VALIDATION_RE = re.compile(r"^((%s)(%s))*(%s)?$" % (__VALUE_URE, __UNITS_URE, __VALUE_URE))
 
 BYTE_UNITS = {
     "b": 1,
@@ -336,11 +346,18 @@ TIME_UNITS = {
     "s": 1,
     "min": 60,
     "h": 60 * 60,
-    "d": 60 * 60 *24,
+    "d": 60 * 60 * 24,
 }
 
-def apply_units(string: str, units: Dict[str, int], inter: Union[Callable, None, type] = None, final: type = float, blank_reg: Pattern = _BLANK_RE,
-                value_reg: Pattern = _VALUE_RE) -> Union[float, int]:
+
+def apply_units(
+    string: str,
+    units: Dict[str, int],
+    inter: Union[Callable, None, type] = None,
+    final: type = float,
+    blank_reg: Pattern = _BLANK_RE,
+    value_reg: Pattern = _VALUE_RE,
+) -> Union[float, int]:
     """Parse the string applying the units defined in units
     (e.g.: "1.5m",{'m',60} -> 80).
 
@@ -361,7 +378,7 @@ def apply_units(string: str, units: Dict[str, int], inter: Union[Callable, None,
     """
     if inter is None:
         inter = final
-    fstring = _BLANK_RE.sub('', string)
+    fstring = _BLANK_RE.sub("", string)
     if not (fstring and _VALIDATION_RE.match(fstring)):
         raise ValueError("Invalid unit string: %r." % string)
     values = []
@@ -373,15 +390,15 @@ def apply_units(string: str, units: Dict[str, int], inter: Union[Callable, None,
             try:
                 value *= units[unit.lower()]
             except KeyError:
-                raise ValueError('invalid unit %s. valid units are %s' %
-                                 (unit, list(units.keys())))
+                raise ValueError("invalid unit %s. valid units are %s" % (unit, list(units.keys())))
         values.append(value)
     return final(sum(values))
 
 
-_LINE_RGX = re.compile('\r\n|\r+|\n')
+_LINE_RGX = re.compile("\r\n|\r+|\n")
+
 
-def pretty_match(match: Match, string: str, underline_char: str = '^') -> str:
+def pretty_match(match: Match, string: str, underline_char: str = "^") -> str:
     """return a string with the match location underlined:
 
     >>> import re
@@ -419,7 +436,7 @@ def pretty_match(match: Match, string: str, underline_char: str = '^') -> str:
         result = [string[:start_line_pos]]
         start_line_pos += len(linesep)
     offset = start - start_line_pos
-    underline = ' ' * offset + underline_char * (end - start)
+    underline = " " * offset + underline_char * (end - start)
     end_line_pos = string.find(linesep, end)
     if end_line_pos == -1:
         string = string[start_line_pos:]
@@ -429,7 +446,7 @@ def pretty_match(match: Match, string: str, underline_char: str = '^') -> str:
         # mypy: Incompatible types in assignment (expression has type "str",
         # mypy: variable has type "int")
         # but it's a str :|
-        end = string[end_line_pos + len(linesep):]  # type: ignore
+        end = string[end_line_pos + len(linesep) :]  # type: ignore
         string = string[start_line_pos:end_line_pos]
         result.append(string)
         result.append(underline)
@@ -439,30 +456,31 @@ def pretty_match(match: Match, string: str, underline_char: str = '^') -> str:
 
 # Ansi colorization ###########################################################
 
-ANSI_PREFIX = '\033['
-ANSI_END = 'm'
-ANSI_RESET = '\033[0m'
+ANSI_PREFIX = "\033["
+ANSI_END = "m"
+ANSI_RESET = "\033[0m"
 ANSI_STYLES = {
-    'reset': "0",
-    'bold': "1",
-    'italic': "3",
-    'underline': "4",
-    'blink': "5",
-    'inverse': "7",
-    'strike': "9",
+    "reset": "0",
+    "bold": "1",
+    "italic": "3",
+    "underline": "4",
+    "blink": "5",
+    "inverse": "7",
+    "strike": "9",
 }
 ANSI_COLORS = {
-    'reset': "0",
-    'black': "30",
-    'red': "31",
-    'green': "32",
-    'yellow': "33",
-    'blue': "34",
-    'magenta': "35",
-    'cyan': "36",
-    'white': "37",
+    "reset": "0",
+    "black": "30",
+    "red": "31",
+    "green": "32",
+    "yellow": "33",
+    "blue": "34",
+    "magenta": "35",
+    "cyan": "36",
+    "white": "37",
 }
 
+
 def _get_ansi_code(color: Optional[str] = None, style: Optional[str] = None) -> str:
     """return ansi escape code corresponding to color and style
 
@@ -488,13 +506,14 @@ def _get_ansi_code(color: Optional[str] = None, style: Optional[str] = None) ->
             ansi_code.append(ANSI_STYLES[effect])
     if color:
         if color.isdigit():
-            ansi_code.extend(['38', '5'])
+            ansi_code.extend(["38", "5"])
             ansi_code.append(color)
         else:
             ansi_code.append(ANSI_COLORS[color])
     if ansi_code:
-        return ANSI_PREFIX + ';'.join(ansi_code) + ANSI_END
-    return ''
+        return ANSI_PREFIX + ";".join(ansi_code) + ANSI_END
+    return ""
+
 
 def colorize_ansi(msg: str, color: Optional[str] = None, style: Optional[str] = None) -> str:
     """colorize message by wrapping it with ansi escape codes
@@ -522,23 +541,24 @@ def colorize_ansi(msg: str, color: Optional[str] = None, style: Optional[str] =
     escape_code = _get_ansi_code(color, style)
     # If invalid (or unknown) color, don't wrap msg with ansi codes
     if escape_code:
-        return '%s%s%s' % (escape_code, msg, ANSI_RESET)
+        return "%s%s%s" % (escape_code, msg, ANSI_RESET)
     return msg
 
-DIFF_STYLE = {'separator': 'cyan', 'remove': 'red', 'add': 'green'}
+
+DIFF_STYLE = {"separator": "cyan", "remove": "red", "add": "green"}
+
 
 def diff_colorize_ansi(lines, out=sys.stdout, style=DIFF_STYLE):
     for line in lines:
-        if line[:4] in ('--- ', '+++ '):
-            out.write(colorize_ansi(line, style['separator']))
-        elif line[0] == '-':
-            out.write(colorize_ansi(line, style['remove']))
-        elif line[0] == '+':
-            out.write(colorize_ansi(line, style['add']))
-        elif line[:4] == '--- ':
-            out.write(colorize_ansi(line, style['separator']))
-        elif line[:4] == '+++ ':
-            out.write(colorize_ansi(line, style['separator']))
+        if line[:4] in ("--- ", "+++ "):
+            out.write(colorize_ansi(line, style["separator"]))
+        elif line[0] == "-":
+            out.write(colorize_ansi(line, style["remove"]))
+        elif line[0] == "+":
+            out.write(colorize_ansi(line, style["add"]))
+        elif line[:4] == "--- ":
+            out.write(colorize_ansi(line, style["separator"]))
+        elif line[:4] == "+++ ":
+            out.write(colorize_ansi(line, style["separator"]))
         else:
             out.write(line)
-