diff options
author | Sylvain <syt@logilab.fr> | 2006-12-11 17:09:20 +0100 |
---|---|---|
committer | Sylvain <syt@logilab.fr> | 2006-12-11 17:09:20 +0100 |
commit | 7fbbf440b0e04358d2cc8bf6a3d9f01ccc54adff (patch) | |
tree | e9249ea25454a08b5b3f55e1ed721e7319fd637f /textutils.py | |
parent | 233b46c5c6764b6068bd31609eb282207b612435 (diff) | |
download | logilab-common-7fbbf440b0e04358d2cc8bf6a3d9f01ccc54adff.tar.gz |
textutils: new 'rest' argument to normalize_text to better deal with
ReST formated text
Diffstat (limited to 'textutils.py')
-rw-r--r-- | textutils.py | 78 |
1 files changed, 60 insertions, 18 deletions
diff --git a/textutils.py b/textutils.py index c319d7f..c26f932 100644 --- a/textutils.py +++ b/textutils.py @@ -71,7 +71,7 @@ def unquote(string): _BLANKLINES_RGX = re.compile('\r?\n\r?\n') _NORM_SPACES_RGX = re.compile('\s+') -def normalize_text(text, line_len=80, indent=''): +def normalize_text(text, line_len=80, indent='', rest=False): """normalize a text to display it with a maximum line size and optionally arbitrary indentation. Line jumps are normalized but blank lines are kept. The indentation string may be used to insert a @@ -92,19 +92,22 @@ def normalize_text(text, line_len=80, indent=''): inferior to `line_len`, and optionally prefixed by an indentation string """ + if rest: + normp = normalize_rest_paragraph + else: + normp = normalize_paragraph result = [] for text in _BLANKLINES_RGX.split(text): - result.append(normalize_paragraph(text, line_len, indent)) -## return ('%s%s%s' % (linesep, indent, linesep)).join(result) + result.append(normp(text, line_len, indent)) return ('%s%s' % (linesep, linesep)).join(result) + def normalize_paragraph(text, line_len=80, indent=''): """normalize a text to display it with a maximum line size and optionaly arbitrary indentation. Line jumps are normalized. The indentation string may be used top insert a comment mark for instance. - :type text: str or unicode :param text: the input text to normalize @@ -120,25 +123,64 @@ def normalize_paragraph(text, line_len=80, indent=''): inferior to `line_len`, and optionally prefixed by an indentation string """ - #text = text.replace(linesep, ' ') text = _NORM_SPACES_RGX.sub(' ', text) lines = [] while text: - text = text.strip() - pos = min(len(indent) + len(text), line_len) - if pos == line_len and len(text) > line_len: - pos = pos - len(indent) - while pos > 0 and text[pos] != ' ': - pos -= 1 - if pos == 0: - pos = min(len(indent) + len(text), line_len) - pos = pos - len(indent) - while len(text) > pos and text[pos] != ' ': - pos += 1 - lines.append((indent + text[:pos])) - text = text[pos+1:] + aline, text = splittext(indent + text.strip(), line_len) + lines.append(aline) + return linesep.join(lines) + +def normalize_rest_paragraph(text, line_len=80, indent=''): + """normalize a ReST text to display it with a maximum line size and + optionaly arbitrary indentation. Line jumps are normalized. The + indentation string may be used top insert a comment mark for + instance. + + :type text: str or unicode + :param text: the input text to normalize + + :type line_len: int + :param line_len: expected maximum line's length, default to 80 + + :type indent: str or unicode + :param indent: optional string to use as indentation + + :rtype: str or unicode + :return: + the input text normalized to fit on lines with a maximized size + inferior to `line_len`, and optionally prefixed by an + indentation string + """ + toreport = '' + lines = [] + for line in text.splitlines(): + line = indent + toreport + _NORM_SPACES_RGX.sub(' ', line.strip()) + toreport = '' + if len(line) > line_len: + # too long line, need split + line, toreport = splittext(line, line_len) + toreport += ' ' + lines.append(line) return linesep.join(lines) +def splittext(text, line_len): + """split the given text on space according to the given max line size + + return a 2-uple: + * a line <= line_len if possible + * the rest of the text which has to be reported on another line + """ + if len(text) <= line_len: + return text, '' + pos = min(len(text)-1, line_len) + while pos > 0 and text[pos] != ' ': + pos -= 1 + if pos == 0: + pos = min(len(text), line_len) + while len(text) > pos and text[pos] != ' ': + pos += 1 + return text[:pos], text[pos+1:] + def get_csv(string, sep=','): """return a list of string in from a csv formatted line |