diff options
author | willmcgugan <willmcgugan@67cdc799-7952-0410-af00-57a81ceafa0f> | 2009-11-23 14:50:19 +0000 |
---|---|---|
committer | willmcgugan <willmcgugan@67cdc799-7952-0410-af00-57a81ceafa0f> | 2009-11-23 14:50:19 +0000 |
commit | 4d59bcdeca2312d327c14f484b44cabfd9979c1b (patch) | |
tree | 15acbbccd04cad2329a1fdc763e5042f3278a910 /fs/utils.py | |
parent | 11b9f16a30f3ec4b6cacfe24ebaa83bb5ddf714e (diff) | |
download | pyfilesystem-git-4d59bcdeca2312d327c14f484b44cabfd9979c1b.tar.gz |
Modified signature generation to only read a portion of the beginning of the file, which should be enough to indicate a potention duplicate.
Diffstat (limited to 'fs/utils.py')
-rw-r--r-- | fs/utils.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/fs/utils.py b/fs/utils.py index 27cb612..fb1ffbb 100644 --- a/fs/utils.py +++ b/fs/utils.py @@ -172,7 +172,7 @@ def find_duplicates(fs, compare_paths=None, quick=False, signature_size=16384): signatures = defaultdict(list) - # A signature is a tuple of CRC32s for each 16K of the file + # A signature is a tuple of CRC32s for each 4x16K of the file # This allows us to find potential duplicates with a dictionary lookup for paths in size_duplicates: for path in paths: @@ -180,7 +180,7 @@ def find_duplicates(fs, compare_paths=None, quick=False, signature_size=16384): fread = None try: fread = fs.open(path, 'rb') - while True: + while len(signature) < 4: data = fread.read(signature_size) if not data: break |