From d678c8fd0ff14bee4d3de4addd3456282d704c78 Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Mon, 3 Apr 2023 21:04:37 -0600 Subject: [PATCH] Add audio hashing --- main.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index f949256..6055557 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,7 @@ import os import hashlib from PIL import Image, UnidentifiedImageError +import acoustid from imagehash import average_hash from videohash import VideoHash from videohash.exceptions import FFmpegFailedToExtractFrames @@ -49,6 +50,12 @@ def get_image_hash(filename): except UnidentifiedImageError: return None +def get_audio_hash(filename): + try: + return str(acoustid.fingerprint_file(filename)[1].decode()) + '_audio' + except acoustid.FingerprintGenerationError: + return None + def get_video_hash(filename): try: return str(VideoHash(path=filename).hash_hex) + '_video' @@ -65,7 +72,10 @@ for filename in filenames: if filename in delete: continue try: - digest = get_image_hash(filename) or get_video_hash(filename) + digest = get_image_hash(filename) or get_audio_hash(filename) or get_video_hash(filename) + except KeyboardInterrupt: + print('Skipping media hashing.') + break except BaseException as e: print() print('Exception', e.__class__.__name__, str(e), 'while hashing:') @@ -79,8 +89,16 @@ for filename in filenames: else: hashes.add(digest) +for dupe in delete: + print(dupe) print() -print('Found', len(delete), 'total duplicate files.') +print('Found', len(delete), 'total duplicate files. Delete them?') +print('ENTER to continue, ctrl+c to cancel.') +try: + input() +except KeyboardInterrupt: + print('\nCancelled.') + os._exit(0) print('Deleting...') for dupe in delete: