Add audio hashing

This commit is contained in:
Tanner Collin 2023-04-03 21:04:37 -06:00
parent 80fd7f844b
commit d678c8fd0f

22
main.py
View File

@ -3,6 +3,7 @@ import os
import hashlib import hashlib
from PIL import Image, UnidentifiedImageError from PIL import Image, UnidentifiedImageError
import acoustid
from imagehash import average_hash from imagehash import average_hash
from videohash import VideoHash from videohash import VideoHash
from videohash.exceptions import FFmpegFailedToExtractFrames from videohash.exceptions import FFmpegFailedToExtractFrames
@ -49,6 +50,12 @@ def get_image_hash(filename):
except UnidentifiedImageError: except UnidentifiedImageError:
return None return None
def get_audio_hash(filename):
try:
return str(acoustid.fingerprint_file(filename)[1].decode()) + '_audio'
except acoustid.FingerprintGenerationError:
return None
def get_video_hash(filename): def get_video_hash(filename):
try: try:
return str(VideoHash(path=filename).hash_hex) + '_video' return str(VideoHash(path=filename).hash_hex) + '_video'
@ -65,7 +72,10 @@ for filename in filenames:
if filename in delete: continue if filename in delete: continue
try: try:
digest = get_image_hash(filename) or get_video_hash(filename) digest = get_image_hash(filename) or get_audio_hash(filename) or get_video_hash(filename)
except KeyboardInterrupt:
print('Skipping media hashing.')
break
except BaseException as e: except BaseException as e:
print() print()
print('Exception', e.__class__.__name__, str(e), 'while hashing:') print('Exception', e.__class__.__name__, str(e), 'while hashing:')
@ -79,8 +89,16 @@ for filename in filenames:
else: else:
hashes.add(digest) hashes.add(digest)
for dupe in delete:
print(dupe)
print() print()
print('Found', len(delete), 'total duplicate files.') print('Found', len(delete), 'total duplicate files. Delete them?')
print('ENTER to continue, ctrl+c to cancel.')
try:
input()
except KeyboardInterrupt:
print('\nCancelled.')
os._exit(0)
print('Deleting...') print('Deleting...')
for dupe in delete: for dupe in delete: