# Copyright (c) 2003-2005 LOGILAB S.A. (Paris, FRANCE). # http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """Some text manipulation utility functions. :version: $Revision: 1.25 $ :author: Logilab :copyright: 2003-2005 LOGILAB S.A. (Paris, FRANCE) :contact: http://www.logilab.fr/ -- mailto:python-projects@logilab.org :group text formatting: normalize_text, normalize_paragraph, pretty_match,\ unquote, colorize_ansi :group text manipulation: searchall, get_csv :sort: text formatting, text manipulation :type ANSI_STYLES: dict(str) :var ANSI_STYLES: dictionary mapping style identifier to ANSI terminal code :type ANSI_COLORS: dict(str) :var ANSI_COLORS: dictionary mapping color identifier to ANSI terminal code :type ANSI_PREFIX: str :var ANSI_PREFIX: ANSI terminal code notifing the start of an ANSI escape sequence :type ANSI_END: str :var ANSI_END: ANSI terminal code notifing the end of an ANSI escape sequence :type ANSI_RESET: str :var ANSI_RESET: ANSI terminal code reseting format defined by a previous ANSI escape sequence """ __revision__ = "$Id: textutils.py,v 1.25 2005-09-06 08:51:01 alf Exp $" __docformat__ = "restructuredtext en" import re from os import linesep from warnings import warn def searchall(rgx, data): """apply a regexp using "search" until no more match is found This function is deprecated, use re.finditer() instead. """ warn('logilab.common.textutils.searchall() is deprecated, use ' 're.finditer() instead', DeprecationWarning) result = [] match = rgx.search(data) while match is not None: result.append(match) match = rgx.search(data, match.end()) return result def unquote(string): """remove optional quotes (simple or double) from the string :type string: str or unicode :param string: an optionaly quoted string :rtype: str or unicode :return: the unquoted string (or the input string if it wasn't quoted) """ if not string: return string if string[0] in '"\'': string = string[1:] if string[-1] in '"\'': string = string[:-1] return string _BLANKLINES_RGX = re.compile('\r?\n\r?\n') _NORM_SPACES_RGX = re.compile('\s+') def normalize_text(text, line_len=80, indent=''): """normalize a text to display it with a maximum line size and optionally arbitrary indentation. Line jumps are normalized but blank lines are kept. The indentation string may be used to insert a comment (#) or a quoting (>) mark for instance. :type text: str or unicode :param text: the input text to normalize :type line_len: int :param line_len: expected maximum line's length, default to 80 :type indent: str or unicode :param indent: optional string to use as indentation :rtype: str or unicode :return: the input text normalized to fit on lines with a maximized size inferior to `line_len`, and optionally prefixed by an indentation string """ result = [] for text in _BLANKLINES_RGX.split(text): result.append(normalize_paragraph(text, line_len, indent)) ## return ('%s%s%s' % (linesep, indent, linesep)).join(result) return ('%s%s' % (linesep, linesep)).join(result) def normalize_paragraph(text, line_len=80, indent=''): """normalize a text to display it with a maximum line size and optionaly arbitrary indentation. Line jumps are normalized. The indentation string may be used top insert a comment mark for instance. :type text: str or unicode :param text: the input text to normalize :type line_len: int :param line_len: expected maximum line's length, default to 80 :type indent: str or unicode :param indent: optional string to use as indentation :rtype: str or unicode :return: the input text normalized to fit on lines with a maximized size inferior to `line_len`, and optionally prefixed by an indentation string """ #text = text.replace(linesep, ' ') text = _NORM_SPACES_RGX.sub(' ', text) lines = [] while text: text = text.strip() pos = min(len(indent) + len(text), line_len) if pos == line_len and len(text) > line_len: pos = pos - len(indent) while pos > 0 and text[pos] != ' ': pos -= 1 if pos == 0: pos = min(len(indent) + len(text), line_len) pos = pos - len(indent) while text[pos] != ' ': pos += 1 lines.append((indent + text[:pos])) text = text[pos+1:] return linesep.join(lines) def get_csv(string, sep=','): """return a list of string in from a csv formatted line >>> get_csv('a, b, c , 4') ['a', 'b', 'c', '4'] >>> get_csv('a') ['a'] >>> :type string: str or unicode :param string: a csv line :type sep: str or unicode :param sep: field separator, default to the comma (',') :rtype: str or unicode :return: the unquoted string (or the input string if it wasn't quoted) """ return [word.strip() for word in string.split(sep) if word.strip()] _LINE_RGX = re.compile('\r\n|\r+|\n') def pretty_match(match, string, underline_char='^'): """return a string with the match location underlined: >>> import re >>> print pretty_match(re.search('mange', 'il mange du bacon'), 'il mange du bacon') il mange du bacon ^^^^^ >>> :type match: _sre.SRE_match :param match: object returned by re.match, re.search or re.finditer :type string: str or unicode :param string: the string on which the regular expression has been applied to obtain the `match` object :type underline_char: str or unicode :param underline_char: character to use to underline the matched section, default to the carret '^' :rtype: str or unicode :return: the original string with an inserted line to underline the match location """ start = match.start() end = match.end() string = _LINE_RGX.sub(linesep, string) start_line_pos = string.rfind(linesep, 0, start) if start_line_pos == -1: start_line_pos = 0 result = [] else: result = [string[:start_line_pos]] start_line_pos += len(linesep) offset = start - start_line_pos underline = ' ' * offset + underline_char * (end - start) end_line_pos = string.find(linesep, end) if end_line_pos == -1: string = string[start_line_pos:] result.append(string) result.append(underline) else: end = string[end_line_pos + len(linesep):] string = string[start_line_pos:end_line_pos] result.append(string) result.append(underline) result.append(end) return linesep.join(result).rstrip() # Ansi colorization ########################################################### ANSI_PREFIX = '\033[' ANSI_END = 'm' ANSI_RESET = '\033[0m' ANSI_STYLES = { 'reset' : "0", 'bold' : "1", 'italic' : "3", 'underline' : "4", 'blink' : "5", 'inverse' : "7", 'strike' : "9", } ANSI_COLORS = { 'reset' : "0", 'black' : "30", 'red' : "31", 'green' : "32", 'yellow' : "33", 'blue' : "34", 'magenta' : "35", 'cyan' : "36", 'white' : "37", } def _get_ansi_code(color=None, style=None): """return ansi escape code corresponding to color and style :type color: str or None :param color: the color identifier (see `ANSI_COLORS` for available values) :type style: str or None :param style: style string (see `ANSI_COLORS` for available values). To get several style effects at the same time, use a coma as separator. :raise KeyError: if an unexistant color or style identifier is given :rtype: str :return: the built escape code """ ansi_code = [] if style: style_attrs = get_csv(style) for effect in style_attrs: ansi_code.append(ANSI_STYLES[effect]) if color: ansi_code.append(ANSI_COLORS[color]) if ansi_code: return ANSI_PREFIX + ';'.join(ansi_code) + ANSI_END return '' def colorize_ansi(msg, color=None, style=None): """colorize message by wrapping it with ansi escape codes :type msg: str or unicode :param msg: the message string to colorize :type color: str or None :param color: the color identifier (see `ANSI_COLORS` for available values) :type style: str or None :param style: style string (see `ANSI_COLORS` for available values). To get several style effects at the same time, use a coma as separator. :raise KeyError: if an unexistant color or style identifier is given :rtype: str or unicode :return: the ansi escaped string """ # If both color and style are not defined, then leave the text as is if color is None and style is None: return msg escape_code = _get_ansi_code(color, style) # If invalid (or unknown) color, don't wrap msg with ansi codes if escape_code: return '%s%s%s' % (escape_code, msg, ANSI_RESET) return msg