Compare commits
3 Commits
80fd7f844b
...
230460c153
Author | SHA1 | Date | |
---|---|---|---|
230460c153 | |||
dddfbb0724 | |||
d678c8fd0f |
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -102,4 +102,4 @@ ENV/
|
|||
*.swp
|
||||
*.swo
|
||||
|
||||
mcdata/
|
||||
test/
|
||||
|
|
37
main.py
37
main.py
|
@ -3,11 +3,14 @@ import os
|
|||
import hashlib
|
||||
from PIL import Image, UnidentifiedImageError
|
||||
|
||||
import acoustid
|
||||
import chromaprint
|
||||
from imagehash import average_hash
|
||||
from videohash import VideoHash
|
||||
from videohash.exceptions import FFmpegFailedToExtractFrames
|
||||
|
||||
hashes = set()
|
||||
audio_hashes = []
|
||||
delete = set()
|
||||
|
||||
filenames = [x for x in glob.glob('**', recursive=True) if os.path.isfile(x)]
|
||||
|
@ -49,6 +52,14 @@ def get_image_hash(filename):
|
|||
except UnidentifiedImageError:
|
||||
return None
|
||||
|
||||
def get_audio_hash(filename):
|
||||
try:
|
||||
fp = acoustid.fingerprint_file(filename)[1]
|
||||
values, _ = chromaprint.decode_fingerprint(fp)
|
||||
return chromaprint.hash_fingerprint(values)
|
||||
except acoustid.FingerprintGenerationError:
|
||||
return None
|
||||
|
||||
def get_video_hash(filename):
|
||||
try:
|
||||
return str(VideoHash(path=filename).hash_hex) + '_video'
|
||||
|
@ -65,7 +76,10 @@ for filename in filenames:
|
|||
if filename in delete: continue
|
||||
|
||||
try:
|
||||
digest = get_image_hash(filename) or get_video_hash(filename)
|
||||
digest = get_image_hash(filename) or get_audio_hash(filename) or get_video_hash(filename)
|
||||
except KeyboardInterrupt:
|
||||
print('Skipping media hashing.')
|
||||
break
|
||||
except BaseException as e:
|
||||
print()
|
||||
print('Exception', e.__class__.__name__, str(e), 'while hashing:')
|
||||
|
@ -74,13 +88,32 @@ for filename in filenames:
|
|||
|
||||
if not digest: continue
|
||||
|
||||
if type(digest) == int:
|
||||
for h in audio_hashes:
|
||||
if bin(digest ^ h).count('1') <= 5: # TODO adjust?
|
||||
delete.add(filename)
|
||||
break
|
||||
else: # for
|
||||
audio_hashes.append(digest)
|
||||
else:
|
||||
if digest in hashes:
|
||||
delete.add(filename)
|
||||
else:
|
||||
hashes.add(digest)
|
||||
|
||||
print()
|
||||
print('Found', len(delete), 'total duplicate files.')
|
||||
print()
|
||||
for dupe in delete:
|
||||
print(dupe)
|
||||
|
||||
print()
|
||||
print('Found', len(delete), 'total duplicate files. Delete them?')
|
||||
print('ENTER to continue, ctrl+c to cancel.')
|
||||
try:
|
||||
input()
|
||||
except KeyboardInterrupt:
|
||||
print('\nCancelled.')
|
||||
os._exit(0)
|
||||
|
||||
print('Deleting...')
|
||||
for dupe in delete:
|
||||
|
|
19
requirements.txt
Normal file
19
requirements.txt
Normal file
|
@ -0,0 +1,19 @@
|
|||
audioread==3.0.0
|
||||
Brotli==1.0.9
|
||||
certifi==2022.12.7
|
||||
charset-normalizer==3.1.0
|
||||
idna==3.4
|
||||
imagedominantcolor==1.0.1
|
||||
ImageHash==4.3.1
|
||||
mutagen==1.46.0
|
||||
numpy==1.24.2
|
||||
Pillow==9.5.0
|
||||
pyacoustid==1.2.2
|
||||
pycryptodomex==3.17
|
||||
PyWavelets==1.4.1
|
||||
requests==2.28.2
|
||||
scipy==1.10.1
|
||||
urllib3==1.26.15
|
||||
videohash==3.0.1
|
||||
websockets==11.0
|
||||
yt-dlp==2023.3.4
|
Loading…
Reference in New Issue
Block a user