|
|
|
@ -4,11 +4,13 @@ import hashlib |
|
|
|
|
from PIL import Image, UnidentifiedImageError |
|
|
|
|
|
|
|
|
|
import acoustid |
|
|
|
|
import chromaprint |
|
|
|
|
from imagehash import average_hash |
|
|
|
|
from videohash import VideoHash |
|
|
|
|
from videohash.exceptions import FFmpegFailedToExtractFrames |
|
|
|
|
|
|
|
|
|
hashes = set() |
|
|
|
|
audio_hashes = [] |
|
|
|
|
delete = set() |
|
|
|
|
|
|
|
|
|
filenames = [x for x in glob.glob('**', recursive=True) if os.path.isfile(x)] |
|
|
|
@ -52,7 +54,9 @@ def get_image_hash(filename): |
|
|
|
|
|
|
|
|
|
def get_audio_hash(filename): |
|
|
|
|
try: |
|
|
|
|
return str(acoustid.fingerprint_file(filename)[1].decode()) + '_audio' |
|
|
|
|
fp = acoustid.fingerprint_file(filename)[1] |
|
|
|
|
values, _ = chromaprint.decode_fingerprint(fp) |
|
|
|
|
return chromaprint.hash_fingerprint(values) |
|
|
|
|
except acoustid.FingerprintGenerationError: |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
@ -84,13 +88,24 @@ for filename in filenames: |
|
|
|
|
|
|
|
|
|
if not digest: continue |
|
|
|
|
|
|
|
|
|
if digest in hashes: |
|
|
|
|
delete.add(filename) |
|
|
|
|
if type(digest) == int: |
|
|
|
|
for h in audio_hashes: |
|
|
|
|
if bin(digest ^ h).count('1') <= 5: # TODO adjust? |
|
|
|
|
delete.add(filename) |
|
|
|
|
break |
|
|
|
|
else: # for |
|
|
|
|
audio_hashes.append(digest) |
|
|
|
|
else: |
|
|
|
|
hashes.add(digest) |
|
|
|
|
if digest in hashes: |
|
|
|
|
delete.add(filename) |
|
|
|
|
else: |
|
|
|
|
hashes.add(digest) |
|
|
|
|
|
|
|
|
|
print() |
|
|
|
|
print() |
|
|
|
|
for dupe in delete: |
|
|
|
|
print(dupe) |
|
|
|
|
|
|
|
|
|
print() |
|
|
|
|
print('Found', len(delete), 'total duplicate files. Delete them?') |
|
|
|
|
print('ENTER to continue, ctrl+c to cancel.') |
|
|
|
|