From c4ce8d4a136fd48d1efdfe9cf000f6d046c21d0c Mon Sep 17 00:00:00 2001 From: Andrew Kuchling Date: Wed, 19 Mar 2014 16:43:06 -0400 Subject: #14332: provide a better explanation of junk in difflib docs Initial patch by Alba Magallanes. --- Lib/difflib.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'Lib/difflib.py') diff --git a/Lib/difflib.py b/Lib/difflib.py index e8a36219d6..cc573f9827 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -853,10 +853,9 @@ class Differ: and return true iff the string is junk. The module-level function `IS_LINE_JUNK` may be used to filter out lines without visible characters, except for at most one splat ('#'). It is recommended - to leave linejunk None; as of Python 2.3, the underlying - SequenceMatcher class has grown an adaptive notion of "noise" lines - that's better than any static definition the author has ever been - able to craft. + to leave linejunk None; the underlying SequenceMatcher class has + an adaptive notion of "noise" lines that's better than any static + definition the author has ever been able to craft. - `charjunk`: A function that should accept a string of length 1. The module-level function `IS_CHARACTER_JUNK` may be used to filter out @@ -1299,17 +1298,18 @@ def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK): Compare `a` and `b` (lists of strings); return a `Differ`-style delta. Optional keyword parameters `linejunk` and `charjunk` are for filter - functions (or None): + functions, or can be None: - - linejunk: A function that should accept a single string argument, and + - linejunk: A function that should accept a single string argument and return true iff the string is junk. The default is None, and is - recommended; as of Python 2.3, an adaptive notion of "noise" lines is - used that does a good job on its own. + recommended; the underlying SequenceMatcher class has an adaptive + notion of "noise" lines. - - charjunk: A function that should accept a string of length 1. The - default is module-level function IS_CHARACTER_JUNK, which filters out - whitespace characters (a blank or tab; note: bad idea to include newline - in this!). + - charjunk: A function that accepts a character (string of length + 1), and returns true iff the character is junk. The default is + the module-level function IS_CHARACTER_JUNK, which filters out + whitespace characters (a blank or tab; note: it's a bad idea to + include newline in this!). Tools/scripts/ndiff.py is a command-line front-end to this function. @@ -1680,7 +1680,7 @@ class HtmlDiff(object): tabsize -- tab stop spacing, defaults to 8. wrapcolumn -- column number where lines are broken and wrapped, defaults to None where lines are not wrapped. - linejunk,charjunk -- keyword arguments passed into ndiff() (used to by + linejunk,charjunk -- keyword arguments passed into ndiff() (used by HtmlDiff() to generate the side by side HTML differences). See ndiff() documentation for argument default values and descriptions. """ -- cgit v1.2.1 From 6970b0ff29a0da26bdf62a4ad597bd50e705bf4c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 3 Aug 2014 22:36:32 -0700 Subject: Use reversed() instead of creating a new temporary list. --- Lib/difflib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Lib/difflib.py') diff --git a/Lib/difflib.py b/Lib/difflib.py index 4af88a9d78..f161baafe4 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1410,7 +1410,7 @@ def _mdiff(fromlines, tolines, context=None, linejunk=None, change_re.sub(record_sub_info,markers) # process each tuple inserting our special marks that won't be # noticed by an xml/html escaper. - for key,(begin,end) in sub_info[::-1]: + for key,(begin,end) in reversed(sub_info): text = text[0:begin]+'\0'+key+text[begin:end]+'\1'+text[end:] text = text[2:] # Handle case of add/delete entire line -- cgit v1.2.1 From 29e4ba54551c47a12f4f6bbea879205dd0be8c50 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 3 Aug 2014 22:40:59 -0700 Subject: Make the import private to keep the global namespace clean. --- Lib/difflib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Lib/difflib.py') diff --git a/Lib/difflib.py b/Lib/difflib.py index f161baafe4..9bc0d0dc01 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -30,7 +30,7 @@ __all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher', 'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff', 'unified_diff', 'HtmlDiff', 'Match'] -import heapq +from heapq import nlargest as _nlargest from collections import namedtuple as _namedtuple Match = _namedtuple('Match', 'a b size') @@ -729,7 +729,7 @@ def get_close_matches(word, possibilities, n=3, cutoff=0.6): result.append((s.ratio(), x)) # Move the best scorers to head of list - result = heapq.nlargest(n, result) + result = _nlargest(n, result) # Strip scores for the best n matches return [x for score, x in result] -- cgit v1.2.1 From 0e6b0dd07de72d42a49b0a8ac4593b9e733e82e3 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 3 Aug 2014 22:49:07 -0700 Subject: Use two-argument form of next() and use a return-statement instead of an explicit raise StopIteration --- Lib/difflib.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'Lib/difflib.py') diff --git a/Lib/difflib.py b/Lib/difflib.py index 9bc0d0dc01..ae3479d3d8 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1448,10 +1448,7 @@ def _mdiff(fromlines, tolines, context=None, linejunk=None, # are a concatenation of the first character of each of the 4 lines # so we can do some very readable comparisons. while len(lines) < 4: - try: - lines.append(next(diff_lines_iterator)) - except StopIteration: - lines.append('X') + lines.append(next(diff_lines_iterator, 'X')) s = ''.join([line[0] for line in lines]) if s.startswith('X'): # When no more lines, pump out any remaining blank lines so the @@ -1514,7 +1511,7 @@ def _mdiff(fromlines, tolines, context=None, linejunk=None, num_blanks_to_yield -= 1 yield ('','\n'),None,True if s.startswith('X'): - raise StopIteration + return else: yield from_line,to_line,True -- cgit v1.2.1