file-organizer/src/deduplication.py
2024-12-08 05:53:12 +01:00

32 lines
1.2 KiB
Python

import subprocess
import os
from enum import Enum
class CZKAWKA_DELETION_METHOD(Enum):
ALL_EXCEPT_NEWEST = "AEN"
ALL_EXCEPT_OLDEST = "AEO"
ALL_EXCEPT_BIGGEST = "AEB"
ALL_EXCEPT_SMALLEST = "AES"
NONE="NONE"
def deduplicate_files(target_dir, exclude_files):
czkawka_path = os.environ.get("CZKAWKA_PATH", "/app/vendor/czkawka/czkawka_cli")
duplicates = _remove_duplicates(
czkawka_path, target_dir, exclude_files, CZKAWKA_DELETION_METHOD.ALL_EXCEPT_SMALLEST)
def _remove_duplicates(czkawka_path, target_dir, exclude_files, delete_method: CZKAWKA_DELETION_METHOD):
try:
flags = ["video", "--directories", target_dir, "--not-recursive", "--delete-method", delete_method.value]
if exclude_files:
flags.append("--excluded-items")
flags.extend(exclude_files)
flags.append("--tolerance")
flags.append(os.environ.get("CK_DUPLICATE_TOLERANCE", "2"))
print(flags)
result = subprocess.run([czkawka_path, *flags], capture_output=True, text=True, check=True)
print(result.stdout)
except subprocess.CalledProcessError as e:
print(f"Failed to find duplicates: {e.stderr}")