Compare commits
	
		
			4 Commits
		
	
	
		
			no_audio
			...
			c226c3807a
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| c226c3807a | |||
| 230460c153 | |||
| dddfbb0724 | |||
| d678c8fd0f | 
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -102,4 +102,4 @@ ENV/ | ||||
| *.swp | ||||
| *.swo | ||||
|  | ||||
| mcdata/ | ||||
| test/ | ||||
|   | ||||
							
								
								
									
										48
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										48
									
								
								main.py
									
									
									
									
									
								
							| @@ -3,12 +3,16 @@ import os | ||||
| import hashlib | ||||
| from PIL import Image, UnidentifiedImageError | ||||
|  | ||||
| import acoustid | ||||
| import chromaprint | ||||
| from imagehash import average_hash | ||||
| from videohash import VideoHash | ||||
| from videohash.exceptions import FFmpegFailedToExtractFrames | ||||
|  | ||||
| hashes = set() | ||||
| audio_hashes = [] | ||||
| delete = set() | ||||
| hash_lookup = {} | ||||
|  | ||||
| filenames = [x for x in glob.glob('**', recursive=True) if os.path.isfile(x)] | ||||
|  | ||||
| @@ -49,6 +53,14 @@ def get_image_hash(filename): | ||||
|     except UnidentifiedImageError: | ||||
|         return None | ||||
|  | ||||
| def get_audio_hash(filename): | ||||
|     try: | ||||
|         fp = acoustid.fingerprint_file(filename)[1] | ||||
|         values, _ = chromaprint.decode_fingerprint(fp) | ||||
|         return chromaprint.hash_fingerprint(values) | ||||
|     except acoustid.FingerprintGenerationError: | ||||
|         return None | ||||
|  | ||||
| def get_video_hash(filename): | ||||
|     try: | ||||
|         return str(VideoHash(path=filename).hash_hex) + '_video' | ||||
| @@ -65,7 +77,10 @@ for filename in filenames: | ||||
|     if filename in delete: continue | ||||
|  | ||||
|     try: | ||||
|         digest = get_image_hash(filename) or get_video_hash(filename) | ||||
|         digest = get_image_hash(filename) or get_audio_hash(filename) or get_video_hash(filename) | ||||
|     except KeyboardInterrupt: | ||||
|         print('Skipping media hashing.') | ||||
|         break | ||||
|     except BaseException as e: | ||||
|         print() | ||||
|         print('Exception', e.__class__.__name__, str(e), 'while hashing:') | ||||
| @@ -74,13 +89,36 @@ for filename in filenames: | ||||
|  | ||||
|     if not digest: continue | ||||
|  | ||||
|     if digest in hashes: | ||||
|         delete.add(filename) | ||||
|     hash_lookup[digest] = filename | ||||
|  | ||||
|     if type(digest) == int: | ||||
|         for h in audio_hashes: | ||||
|             if bin(digest ^ h).count('1') <= 5:   # TODO adjust? | ||||
|                 delete.add(filename) | ||||
|                 print() | ||||
|                 print(digest, filename, 'close to', h, hash_lookup[h]) | ||||
|                 break | ||||
|         else:  # for | ||||
|             audio_hashes.append(digest) | ||||
|     else: | ||||
|         hashes.add(digest) | ||||
|         if digest in hashes: | ||||
|             delete.add(filename) | ||||
|         else: | ||||
|             hashes.add(digest) | ||||
|  | ||||
| print() | ||||
| print('Found', len(delete), 'total duplicate files.') | ||||
| print() | ||||
| for dupe in delete: | ||||
|     print(dupe) | ||||
|  | ||||
| print() | ||||
| print('Found', len(delete), 'total duplicate files. Delete them?') | ||||
| print('ENTER to continue, ctrl+c to cancel.') | ||||
| try: | ||||
|     input() | ||||
| except KeyboardInterrupt: | ||||
|     print('\nCancelled.') | ||||
|     os._exit(0) | ||||
|  | ||||
| print('Deleting...') | ||||
| for dupe in delete: | ||||
|   | ||||
							
								
								
									
										19
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,19 @@ | ||||
| audioread==3.0.0 | ||||
| Brotli==1.0.9 | ||||
| certifi==2022.12.7 | ||||
| charset-normalizer==3.1.0 | ||||
| idna==3.4 | ||||
| imagedominantcolor==1.0.1 | ||||
| ImageHash==4.3.1 | ||||
| mutagen==1.46.0 | ||||
| numpy==1.24.2 | ||||
| Pillow==9.5.0 | ||||
| pyacoustid==1.2.2 | ||||
| pycryptodomex==3.17 | ||||
| PyWavelets==1.4.1 | ||||
| requests==2.28.2 | ||||
| scipy==1.10.1 | ||||
| urllib3==1.26.15 | ||||
| videohash==3.0.1 | ||||
| websockets==11.0 | ||||
| yt-dlp==2023.3.4 | ||||
		Reference in New Issue
	
	Block a user