Compare commits
1 Commits
230460c153
...
no_audio
Author | SHA1 | Date | |
---|---|---|---|
5e840ae8bb |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -102,4 +102,4 @@ ENV/
|
|||||||
*.swp
|
*.swp
|
||||||
*.swo
|
*.swo
|
||||||
|
|
||||||
test/
|
mcdata/
|
||||||
|
45
main.py
45
main.py
@@ -1,16 +1,14 @@
|
|||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import time
|
||||||
from PIL import Image, UnidentifiedImageError
|
from PIL import Image, UnidentifiedImageError
|
||||||
|
|
||||||
import acoustid
|
|
||||||
import chromaprint
|
|
||||||
from imagehash import average_hash
|
from imagehash import average_hash
|
||||||
from videohash import VideoHash
|
from videohash import VideoHash
|
||||||
from videohash.exceptions import FFmpegFailedToExtractFrames
|
from videohash.exceptions import FFmpegFailedToExtractFrames
|
||||||
|
|
||||||
hashes = set()
|
hashes = set()
|
||||||
audio_hashes = []
|
|
||||||
delete = set()
|
delete = set()
|
||||||
|
|
||||||
filenames = [x for x in glob.glob('**', recursive=True) if os.path.isfile(x)]
|
filenames = [x for x in glob.glob('**', recursive=True) if os.path.isfile(x)]
|
||||||
@@ -23,8 +21,8 @@ except KeyboardInterrupt:
|
|||||||
print('\nCancelled.')
|
print('\nCancelled.')
|
||||||
os._exit(0)
|
os._exit(0)
|
||||||
|
|
||||||
print('Sorting file list by size...')
|
print('Sorting file list by smallest size...')
|
||||||
filenames.sort(key=os.path.getsize, reverse=True)
|
filenames.sort(key=os.path.getsize, reverse=False)
|
||||||
|
|
||||||
print('Deduplicating by md5 hash...')
|
print('Deduplicating by md5 hash...')
|
||||||
|
|
||||||
@@ -52,14 +50,6 @@ def get_image_hash(filename):
|
|||||||
except UnidentifiedImageError:
|
except UnidentifiedImageError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_audio_hash(filename):
|
|
||||||
try:
|
|
||||||
fp = acoustid.fingerprint_file(filename)[1]
|
|
||||||
values, _ = chromaprint.decode_fingerprint(fp)
|
|
||||||
return chromaprint.hash_fingerprint(values)
|
|
||||||
except acoustid.FingerprintGenerationError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def get_video_hash(filename):
|
def get_video_hash(filename):
|
||||||
try:
|
try:
|
||||||
return str(VideoHash(path=filename).hash_hex) + '_video'
|
return str(VideoHash(path=filename).hash_hex) + '_video'
|
||||||
@@ -76,44 +66,27 @@ for filename in filenames:
|
|||||||
if filename in delete: continue
|
if filename in delete: continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
digest = get_image_hash(filename) or get_audio_hash(filename) or get_video_hash(filename)
|
digest = get_image_hash(filename) or get_video_hash(filename)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print('Skipping media hashing.')
|
print('\nCancelled.')
|
||||||
break
|
os._exit(0)
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
print()
|
print()
|
||||||
print('Exception', e.__class__.__name__, str(e), 'while hashing:')
|
print('Exception', e.__class__.__name__, str(e), 'while hashing:')
|
||||||
print(filename)
|
print(filename)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
if not digest: continue
|
if not digest: continue
|
||||||
|
|
||||||
if type(digest) == int:
|
|
||||||
for h in audio_hashes:
|
|
||||||
if bin(digest ^ h).count('1') <= 5: # TODO adjust?
|
|
||||||
delete.add(filename)
|
|
||||||
break
|
|
||||||
else: # for
|
|
||||||
audio_hashes.append(digest)
|
|
||||||
else:
|
|
||||||
if digest in hashes:
|
if digest in hashes:
|
||||||
delete.add(filename)
|
delete.add(filename)
|
||||||
else:
|
else:
|
||||||
hashes.add(digest)
|
hashes.add(digest)
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print()
|
print('Found', len(delete), 'total duplicate files.')
|
||||||
for dupe in delete:
|
|
||||||
print(dupe)
|
|
||||||
|
|
||||||
print()
|
|
||||||
print('Found', len(delete), 'total duplicate files. Delete them?')
|
|
||||||
print('ENTER to continue, ctrl+c to cancel.')
|
|
||||||
try:
|
|
||||||
input()
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print('\nCancelled.')
|
|
||||||
os._exit(0)
|
|
||||||
|
|
||||||
print('Deleting...')
|
print('Deleting...')
|
||||||
for dupe in delete:
|
for dupe in delete:
|
||||||
|
@@ -1,19 +0,0 @@
|
|||||||
audioread==3.0.0
|
|
||||||
Brotli==1.0.9
|
|
||||||
certifi==2022.12.7
|
|
||||||
charset-normalizer==3.1.0
|
|
||||||
idna==3.4
|
|
||||||
imagedominantcolor==1.0.1
|
|
||||||
ImageHash==4.3.1
|
|
||||||
mutagen==1.46.0
|
|
||||||
numpy==1.24.2
|
|
||||||
Pillow==9.5.0
|
|
||||||
pyacoustid==1.2.2
|
|
||||||
pycryptodomex==3.17
|
|
||||||
PyWavelets==1.4.1
|
|
||||||
requests==2.28.2
|
|
||||||
scipy==1.10.1
|
|
||||||
urllib3==1.26.15
|
|
||||||
videohash==3.0.1
|
|
||||||
websockets==11.0
|
|
||||||
yt-dlp==2023.3.4
|
|
Reference in New Issue
Block a user