Compare bits on audio hashes
This commit is contained in:
parent
d678c8fd0f
commit
dddfbb0724
17
main.py
17
main.py
|
@ -4,11 +4,13 @@ import hashlib
|
|||
from PIL import Image, UnidentifiedImageError
|
||||
|
||||
import acoustid
|
||||
import chromaprint
|
||||
from imagehash import average_hash
|
||||
from videohash import VideoHash
|
||||
from videohash.exceptions import FFmpegFailedToExtractFrames
|
||||
|
||||
hashes = set()
|
||||
audio_hashes = []
|
||||
delete = set()
|
||||
|
||||
filenames = [x for x in glob.glob('**', recursive=True) if os.path.isfile(x)]
|
||||
|
@ -52,7 +54,9 @@ def get_image_hash(filename):
|
|||
|
||||
def get_audio_hash(filename):
|
||||
try:
|
||||
return str(acoustid.fingerprint_file(filename)[1].decode()) + '_audio'
|
||||
fp = acoustid.fingerprint_file(filename)[1]
|
||||
values, _ = chromaprint.decode_fingerprint(fp)
|
||||
return chromaprint.hash_fingerprint(values)
|
||||
except acoustid.FingerprintGenerationError:
|
||||
return None
|
||||
|
||||
|
@ -84,13 +88,24 @@ for filename in filenames:
|
|||
|
||||
if not digest: continue
|
||||
|
||||
if type(digest) == int:
|
||||
for h in audio_hashes:
|
||||
if bin(digest ^ h).count('1') <= 5: # TODO adjust?
|
||||
delete.add(filename)
|
||||
break
|
||||
else: # for
|
||||
audio_hashes.append(digest)
|
||||
else:
|
||||
if digest in hashes:
|
||||
delete.add(filename)
|
||||
else:
|
||||
hashes.add(digest)
|
||||
|
||||
print()
|
||||
print()
|
||||
for dupe in delete:
|
||||
print(dupe)
|
||||
|
||||
print()
|
||||
print('Found', len(delete), 'total duplicate files. Delete them?')
|
||||
print('ENTER to continue, ctrl+c to cancel.')
|
||||
|
|
Loading…
Reference in New Issue
Block a user