From dddfbb07247801b4bbcead7446362ef37131cf12 Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Mon, 3 Apr 2023 22:11:34 -0600 Subject: [PATCH] Compare bits on audio hashes --- main.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 6055557..0ec2daa 100644 --- a/main.py +++ b/main.py @@ -4,11 +4,13 @@ import hashlib from PIL import Image, UnidentifiedImageError import acoustid +import chromaprint from imagehash import average_hash from videohash import VideoHash from videohash.exceptions import FFmpegFailedToExtractFrames hashes = set() +audio_hashes = [] delete = set() filenames = [x for x in glob.glob('**', recursive=True) if os.path.isfile(x)] @@ -52,7 +54,9 @@ def get_image_hash(filename): def get_audio_hash(filename): try: - return str(acoustid.fingerprint_file(filename)[1].decode()) + '_audio' + fp = acoustid.fingerprint_file(filename)[1] + values, _ = chromaprint.decode_fingerprint(fp) + return chromaprint.hash_fingerprint(values) except acoustid.FingerprintGenerationError: return None @@ -84,13 +88,24 @@ for filename in filenames: if not digest: continue - if digest in hashes: - delete.add(filename) + if type(digest) == int: + for h in audio_hashes: + if bin(digest ^ h).count('1') <= 5: # TODO adjust? + delete.add(filename) + break + else: # for + audio_hashes.append(digest) else: - hashes.add(digest) + if digest in hashes: + delete.add(filename) + else: + hashes.add(digest) +print() +print() for dupe in delete: print(dupe) + print() print('Found', len(delete), 'total duplicate files. Delete them?') print('ENTER to continue, ctrl+c to cancel.')