file-organizer/src/deduplication.py
Jose134 bbef192a05
All checks were successful
gitea/file-organizer/pipeline/head This commit looks good
Install czkawka via download in dockerfile + fix uvicorn dependency
2025-02-02 03:06:22 +01:00

43 lines
1.5 KiB
Python

import subprocess
import os
from enum import Enum
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
class CZKAWKA_DELETION_METHOD(Enum):
ALL_EXCEPT_NEWEST = "AEN"
ALL_EXCEPT_OLDEST = "AEO"
ALL_EXCEPT_BIGGEST = "AEB"
ALL_EXCEPT_SMALLEST = "AES"
NONE="NONE"
def deduplicate_files(target_dir, exclude_files):
czkawka_executable = "czkawka"
czkawka_deletion_method = CZKAWKA_DELETION_METHOD.ALL_EXCEPT_SMALLEST
czkawka_tolerance = os.environ.get("CK_DUPLICATE_TOLERANCE", "2")
_remove_duplicates(czkawka_executable, target_dir, exclude_files, czkawka_deletion_method, czkawka_tolerance)
def _remove_duplicates(czkawka_path, target_dir, exclude_files, delete_method, tolerance):
try:
flags = _build_czkawka_flags(target_dir, exclude_files, delete_method, tolerance)
result = subprocess.run([czkawka_path, *flags], capture_output=True, text=True, check=True)
logger.log(result.stdout)
except subprocess.CalledProcessError as e:
logger.error(f"Failed to find duplicates: {e.stderr}")
def _build_czkawka_flags(target_dir, exclude_files, delete_method, tolerance):
flags = ["video", "--directories", target_dir, "--not-recursive", "--delete-method", delete_method.value]
for file in exclude_files:
flags.append("--excluded-items")
flags.append(file)
flags.append("--tolerance")
flags.append(tolerance)
return flags