Compare bits on audio hashes

This commit is contained in:
Tanner Collin 2023-04-03 22:11:34 -06:00
parent d678c8fd0f
commit dddfbb0724

23
main.py
View File

@ -4,11 +4,13 @@ import hashlib
from PIL import Image, UnidentifiedImageError
import acoustid
import chromaprint
from imagehash import average_hash
from videohash import VideoHash
from videohash.exceptions import FFmpegFailedToExtractFrames
hashes = set()
audio_hashes = []
delete = set()
filenames = [x for x in glob.glob('**', recursive=True) if os.path.isfile(x)]
@ -52,7 +54,9 @@ def get_image_hash(filename):
def get_audio_hash(filename):
try:
return str(acoustid.fingerprint_file(filename)[1].decode()) + '_audio'
fp = acoustid.fingerprint_file(filename)[1]
values, _ = chromaprint.decode_fingerprint(fp)
return chromaprint.hash_fingerprint(values)
except acoustid.FingerprintGenerationError:
return None
@ -84,13 +88,24 @@ for filename in filenames:
if not digest: continue
if digest in hashes:
delete.add(filename)
if type(digest) == int:
for h in audio_hashes:
if bin(digest ^ h).count('1') <= 5: # TODO adjust?
delete.add(filename)
break
else: # for
audio_hashes.append(digest)
else:
hashes.add(digest)
if digest in hashes:
delete.add(filename)
else:
hashes.add(digest)
print()
print()
for dupe in delete:
print(dupe)
print()
print('Found', len(delete), 'total duplicate files. Delete them?')
print('ENTER to continue, ctrl+c to cancel.')