import subprocess import os from enum import Enum import logging logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) class CZKAWKA_DELETION_METHOD(Enum): ALL_EXCEPT_NEWEST = "AEN" ALL_EXCEPT_OLDEST = "AEO" ALL_EXCEPT_BIGGEST = "AEB" ALL_EXCEPT_SMALLEST = "AES" NONE="NONE" def deduplicate_files(target_dir, exclude_files): czkawka_executable = "czkawka" czkawka_deletion_method = CZKAWKA_DELETION_METHOD.ALL_EXCEPT_SMALLEST czkawka_tolerance = os.environ.get("CK_DUPLICATE_TOLERANCE", "2") _remove_duplicates(czkawka_executable, target_dir, exclude_files, czkawka_deletion_method, czkawka_tolerance) def _remove_duplicates(czkawka_path, target_dir, exclude_files, delete_method, tolerance): try: flags = _build_czkawka_flags(target_dir, exclude_files, delete_method, tolerance) result = subprocess.run([czkawka_path, *flags], capture_output=True, text=True, check=True) logger.log(result.stdout) except subprocess.CalledProcessError as e: logger.error(f"Failed to find duplicates: {e.stderr}") def _build_czkawka_flags(target_dir, exclude_files, delete_method, tolerance): flags = ["video", "--directories", target_dir, "--not-recursive", "--delete-method", delete_method.value] for file in exclude_files: flags.append("--excluded-items") flags.append(file) flags.append("--tolerance") flags.append(tolerance) return flags