summaryrefslogtreecommitdiff
path: root/fs/utils.py
diff options
context:
space:
mode:
authorwillmcgugan <willmcgugan@67cdc799-7952-0410-af00-57a81ceafa0f>2009-11-23 14:50:19 +0000
committerwillmcgugan <willmcgugan@67cdc799-7952-0410-af00-57a81ceafa0f>2009-11-23 14:50:19 +0000
commit4d59bcdeca2312d327c14f484b44cabfd9979c1b (patch)
tree15acbbccd04cad2329a1fdc763e5042f3278a910 /fs/utils.py
parent11b9f16a30f3ec4b6cacfe24ebaa83bb5ddf714e (diff)
downloadpyfilesystem-git-4d59bcdeca2312d327c14f484b44cabfd9979c1b.tar.gz
Modified signature generation to only read a portion of the beginning of the file, which should be enough to indicate a potention duplicate.
Diffstat (limited to 'fs/utils.py')
-rw-r--r--fs/utils.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/fs/utils.py b/fs/utils.py
index 27cb612..fb1ffbb 100644
--- a/fs/utils.py
+++ b/fs/utils.py
@@ -172,7 +172,7 @@ def find_duplicates(fs, compare_paths=None, quick=False, signature_size=16384):
signatures = defaultdict(list)
- # A signature is a tuple of CRC32s for each 16K of the file
+ # A signature is a tuple of CRC32s for each 4x16K of the file
# This allows us to find potential duplicates with a dictionary lookup
for paths in size_duplicates:
for path in paths:
@@ -180,7 +180,7 @@ def find_duplicates(fs, compare_paths=None, quick=False, signature_size=16384):
fread = None
try:
fread = fs.open(path, 'rb')
- while True:
+ while len(signature) < 4:
data = fread.read(signature_size)
if not data:
break