website

Website contents
git clone git://git.reagancfischer.dev/website.git
Log | Files | Refs

cleaner.py (964B)


      1 import os
      2 import hashlib
      3 
      4 # Function to calculate the hash of a file
      5 def calculate_hash(file_path, block_size=65536):
      6     hasher = hashlib.sha256()
      7     with open(file_path, 'rb') as file:
      8         buffer = file.read(block_size)
      9         while len(buffer) > 0:
     10             hasher.update(buffer)
     11             buffer = file.read(block_size)
     12     return hasher.hexdigest()
     13 
     14 # Function to find and remove duplicate files
     15 def remove_duplicates(directory):
     16     file_hashes = {}
     17     for root, _, files in os.walk(directory):
     18         for filename in files:
     19             file_path = os.path.join(root, filename)
     20             file_hash = calculate_hash(file_path)
     21             if file_hash in file_hashes:
     22                 print(f'duplicate found {file_path} is the same as {file_hashes[file_hash]}')
     23                 os.remove(file_path)
     24             else:
     25                 file_hashes[file_hash] = file_path
     26 
     27 # Remove duplicates in the 'images' directory
     28 remove_duplicates('images')
     29