cleaner.py (964B)
1 import os 2 import hashlib 3 4 # Function to calculate the hash of a file 5 def calculate_hash(file_path, block_size=65536): 6 hasher = hashlib.sha256() 7 with open(file_path, 'rb') as file: 8 buffer = file.read(block_size) 9 while len(buffer) > 0: 10 hasher.update(buffer) 11 buffer = file.read(block_size) 12 return hasher.hexdigest() 13 14 # Function to find and remove duplicate files 15 def remove_duplicates(directory): 16 file_hashes = {} 17 for root, _, files in os.walk(directory): 18 for filename in files: 19 file_path = os.path.join(root, filename) 20 file_hash = calculate_hash(file_path) 21 if file_hash in file_hashes: 22 print(f'duplicate found {file_path} is the same as {file_hashes[file_hash]}') 23 os.remove(file_path) 24 else: 25 file_hashes[file_hash] = file_path 26 27 # Remove duplicates in the 'images' directory 28 remove_duplicates('images') 29